From 64ddbe483aef9db9e7d56102439474b9385886df Mon Sep 17 00:00:00 2001 From: not_joon Date: Thu, 19 Jan 2023 21:49:22 +0900 Subject: [PATCH 1/3] implement number formatting --- Lib/test/test_format.py | 2 +- common/src/format.rs | 114 ++++++++++++++++++++++++++-- extra_tests/snippets/builtin_str.py | 33 ++++++++ 3 files changed, 140 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index b51e9c5bf6..39645324a8 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -466,7 +466,7 @@ def test_optimisations(self): self.assertIs(text % (), text) self.assertIs(text.format(), text) - # TODO: RustPython missing complex.__format__ implementation + # TODO: RUSTPYTHON @unittest.expectedFailure def test_precision(self): f = 1.2 diff --git a/common/src/format.rs b/common/src/format.rs index ca994c6dbe..1c1dd29f8b 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -3,7 +3,10 @@ use float_ops::Case; use itertools::{Itertools, PeekingNext}; use num_bigint::{BigInt, Sign}; use num_traits::{cast::ToPrimitive, Signed}; -use std::{cmp, str::FromStr}; +use std::{ + cmp::{self, Ordering}, + str::FromStr, +}; trait FormatParse { fn parse(text: &str) -> (Option, &str) @@ -119,6 +122,19 @@ impl FormatParse for FormatGrouping { } } +#[derive(Debug)] +#[repr(u8)] +enum PaddingStyle { + Whitespace = b' ', + Zero = b'0', +} + +impl From for char { + fn from(from: PaddingStyle) -> char { + from as u8 as char + } +} + #[derive(Debug, PartialEq)] pub enum FormatType { String, @@ -312,13 +328,15 @@ impl FormatSpec { inter: i32, sep: char, disp_digit_cnt: i32, + padding: PaddingStyle, ) -> String { // Don't add separators to the floating decimal point of numbers let mut parts = magnitude_str.splitn(2, '.'); let magnitude_int_str = parts.next().unwrap().to_string(); let dec_digit_cnt = magnitude_str.len() as i32 - magnitude_int_str.len() as i32; let int_digit_cnt = disp_digit_cnt - dec_digit_cnt; - let mut result = FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt); + let mut result = + FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt, padding); if let Some(part) = parts.next() { result.push_str(&format!(".{part}")) } @@ -330,6 +348,7 @@ impl FormatSpec { inter: i32, sep: char, disp_digit_cnt: i32, + padding: PaddingStyle, ) -> String { let magnitude_len = magnitude_str.len() as i32; let offset = (disp_digit_cnt % (inter + 1) == 0) as i32; @@ -338,7 +357,8 @@ impl FormatSpec { if pad_cnt > 0 { // separate with 0 padding let sep_cnt = disp_digit_cnt / (inter + 1); - let padding = "0".repeat((pad_cnt - sep_cnt) as usize); + let padding_char = padding.into(); + let padding = FormatSpec::compute_fill_string(padding_char, pad_cnt - sep_cnt); let padded_num = format!("{padding}{magnitude_str}"); FormatSpec::insert_separator(padded_num, inter, sep, sep_cnt) } else { @@ -384,14 +404,20 @@ impl FormatSpec { fn get_separator_interval(&self) -> usize { match self.format_type { + Some(FormatType::Number) => 9999, Some(FormatType::Binary | FormatType::Octal | FormatType::Hex(_)) => 4, - Some(FormatType::Decimal | FormatType::Number | FormatType::FixedPoint(_)) => 3, + Some(FormatType::Decimal | FormatType::FixedPoint(_)) => 3, None => 3, _ => panic!("Separators only valid for numbers!"), } } - fn add_magnitude_separators(&self, magnitude_str: String, prefix: &str) -> String { + fn add_magnitude_separators( + &self, + magnitude_str: String, + prefix: &str, + padding: PaddingStyle, + ) -> String { match &self.grouping_option { Some(fg) => { let sep = match fg { @@ -407,6 +433,7 @@ impl FormatSpec { inter, sep, disp_digit_cnt, + padding, ) } None => magnitude_str, @@ -433,7 +460,42 @@ impl FormatSpec { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "float")) } - Some(FormatType::Number) => Err(FormatSpecError::NotImplemented('n', "float")), + Some(FormatType::Number) => { + const UPPER_BOUND: f64 = 1e+5; + const LOWER_BOUND: f64 = 1e-4; + const ZERO_SEP: char = '\x00'; + + let show_as_exponential = magnitude >= UPPER_BOUND || magnitude <= LOWER_BOUND; + let precision = if precision == 0 { 1 } else { precision }; + let mut inter = self.get_separator_interval(); + + let width = self.width.unwrap_or(precision); + let disp_digit_cnt = width; + + let magnitude_str: String = if show_as_exponential { + float_ops::format_general( + precision, + magnitude, + Case::Lower, + self.alternate_form, + true, + ) + } else { + // TODO: currently, `nan` and `inf` are not work properly. should handle this + if magnitude.is_nan() { + return Ok("nan".to_string()); + } + self.check_number_formatting_cases(magnitude, width, precision, &mut inter) + }; + + Ok(FormatSpec::add_magnitude_separators_for_char( + magnitude_str, + inter as i32, + ZERO_SEP, + disp_digit_cnt as i32, + PaddingStyle::Whitespace, + )) + } Some(FormatType::GeneralFormat(case)) => { let precision = if precision == 0 { 1 } else { precision }; Ok(float_ops::format_general( @@ -487,7 +549,8 @@ impl FormatSpec { FormatSign::MinusOrSpace => " ", } }; - let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str); + let magnitude_str = + self.add_magnitude_separators(raw_magnitude_str?, sign_str, PaddingStyle::Zero); self.format_sign_and_align( unsafe { &BorrowedStr::from_ascii_unchecked(magnitude_str.as_bytes()) }, sign_str, @@ -495,6 +558,40 @@ impl FormatSpec { ) } + fn check_number_formatting_cases( + &self, + magnitude: f64, + width: usize, + precision: usize, + inter: &mut usize, + ) -> String { + let magnitude_string = magnitude.to_string(); + let (integer, decimal) = magnitude_string + .split_once('.') + .unwrap_or((&magnitude_string, "0")); + let integer_len = integer.len(); + + match integer_len.cmp(&width) { + Ordering::Less => { + let diff: usize = precision - integer_len; + let mut decimal_string: String = decimal.to_string(); + decimal_string.truncate(diff); + // TODO: add round up + *inter += 1; + + integer.to_string() + "." + &decimal_string + } + Ordering::Greater => float_ops::format_general( + precision, + magnitude, + Case::Lower, + self.alternate_form, + true, + ), + Ordering::Equal => integer.to_string(), + } + } + #[inline] fn format_int_radix(&self, magnitude: BigInt, radix: u32) -> Result { match self.precision { @@ -559,7 +656,8 @@ impl FormatSpec { }, }; let sign_prefix = format!("{sign_str}{prefix}"); - let magnitude_str = self.add_magnitude_separators(raw_magnitude_str, &sign_prefix); + let magnitude_str = + self.add_magnitude_separators(raw_magnitude_str, &sign_prefix, PaddingStyle::Zero); self.format_sign_and_align( &BorrowedStr::from_bytes(magnitude_str.as_bytes()), &sign_prefix, diff --git a/extra_tests/snippets/builtin_str.py b/extra_tests/snippets/builtin_str.py index 12060fc40a..ecf141b945 100644 --- a/extra_tests/snippets/builtin_str.py +++ b/extra_tests/snippets/builtin_str.py @@ -609,6 +609,39 @@ def try_mutate_str(): assert '{:g}'.format(1.020e-13) == '1.02e-13' assert "{:g}".format(1.020e-4) == '0.000102' +def test_number_formatting(): + assert '{:n}'.format(0) == '0' + assert '{:2n}'.format(1) == ' 1' + assert '{:5n}'.format(10) == ' 10' + assert '{:5n}'.format(100) == ' 100' + assert '{:5n}'.format(1000) == ' 1000' + assert '{:5n}'.format(10000) == '10000' + #TODO: should remove trailing zero if the `magnitude` is given by exponential style + assert '{:n}'.format(123.123) == '123.123' + assert '{:2n}'.format(123.123) == '123.123' + assert '{:5n}'.format(123.123) == '123.123' + assert '{:.1n}'.format(1.12345) == '1' + assert '{:.2n}'.format(1.12345) == '1.1' + assert '{:.3n}'.format(1.12345) == '1.12' + assert '{:.4n}'.format(1.12345) == '1.123' + assert '{:.6n}'.format(1.12345) == '1.12345' + assert '{:.1n}'.format(12345.12345) == '1e+04' + assert '{:.2n}'.format(12345.12345) == '1.2e+04' + assert '{:.3n}'.format(12345.12345) == '1.23e+04' + assert '{:.4n}'.format(12345.12345) == '1.235e+04' + assert '{:5n}'.format(1234567.0) == '1.23457e+06' + assert '{:5n}'.format(1234567.1) == '1.23457e+06' + assert '{:.6n}'.format(12345.12345) == '12345.1' + assert '{:.7n}'.format(12345.12345) == '12345.12' + assert '{:.8n}'.format(12345.12345) == '12345.123' + assert '{:.9n}'.format(12345.12345) == '12345.1234' + assert '{:.10n}'.format(12345.12345) == '12345.12345' + assert '{:.11n}'.format(12345.12345) == '12345.12345' + + ##TODO `nan` and `inf` are not fully supported yet + assert '{:n}'.format(float('nan')) == 'nan' + assert '{:n}'.format(float('-nan')) == 'nan' + # remove*fix test def test_removeprefix(): s = 'foobarfoo' From c8994fc995ac69d5b03539ae52e6e658b72fe63c Mon Sep 17 00:00:00 2001 From: not_joon Date: Sat, 4 Mar 2023 14:33:17 +0900 Subject: [PATCH 2/3] remove test decorators --- Lib/test/test_format.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 39645324a8..2dd9ca5254 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -396,8 +396,6 @@ def test_nul(self): testformat("a%sb", ('c\0d',), 'ac\0db') testcommon(b"a%sb", (b'c\0d',), b'ac\0db') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_non_ascii(self): testformat("\u20ac=%f", (1.0,), "\u20ac=1.000000") @@ -419,8 +417,6 @@ def test_non_ascii(self): self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_locale(self): try: oldloc = locale.setlocale(locale.LC_ALL) @@ -466,8 +462,6 @@ def test_optimisations(self): self.assertIs(text % (), text) self.assertIs(text.format(), text) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_precision(self): f = 1.2 self.assertEqual(format(f, ".0f"), "1") From 468c60f42d27651f9d49d8592bde64d0707caeff Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sat, 4 Mar 2023 14:33:57 +0900 Subject: [PATCH 3/3] little bit clean up --- common/src/format.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/common/src/format.rs b/common/src/format.rs index 1c1dd29f8b..4a7ed643e4 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -3,10 +3,7 @@ use float_ops::Case; use itertools::{Itertools, PeekingNext}; use num_bigint::{BigInt, Sign}; use num_traits::{cast::ToPrimitive, Signed}; -use std::{ - cmp::{self, Ordering}, - str::FromStr, -}; +use std::{cmp::Ordering, str::FromStr}; trait FormatParse { fn parse(text: &str) -> (Option, &str) @@ -111,6 +108,15 @@ pub enum FormatGrouping { Underscore, } +impl FormatGrouping { + fn to_char(&self) -> char { + match self { + Self::Comma => ',', + Self::Underscore => '_', + } + } +} + impl FormatParse for FormatGrouping { fn parse(text: &str) -> (Option, &str) { let mut chars = text.chars(); @@ -419,15 +425,12 @@ impl FormatSpec { padding: PaddingStyle, ) -> String { match &self.grouping_option { - Some(fg) => { - let sep = match fg { - FormatGrouping::Comma => ',', - FormatGrouping::Underscore => '_', - }; + Some(grouping) => { + let sep = grouping.to_char(); let inter = self.get_separator_interval().try_into().unwrap(); let magnitude_len = magnitude_str.len(); let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; - let disp_digit_cnt = cmp::max(width, magnitude_len as i32); + let disp_digit_cnt = std::cmp::max(width, magnitude_len as i32); FormatSpec::add_magnitude_separators_for_char( magnitude_str, inter, @@ -694,7 +697,7 @@ impl FormatSpec { let num_chars = magnitude_str.char_len(); let fill_char = self.fill.unwrap_or(' '); let fill_chars_needed: i32 = self.width.map_or(0, |w| { - cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32)) + std::cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32)) }); Ok(match align { FormatAlign::Left => format!(