Skip to content

Add basic number formatting #4461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions Lib/test/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ def test_nul(self):
testformat("a%sb", ('c\0d',), 'ac\0db')
testcommon(b"a%sb", (b'c\0d',), b'ac\0db')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_non_ascii(self):
testformat("\u20ac=%f", (1.0,), "\u20ac=1.000000")

Expand All @@ -419,8 +417,6 @@ def test_non_ascii(self):
self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007")
self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_locale(self):
try:
oldloc = locale.setlocale(locale.LC_ALL)
Expand Down Expand Up @@ -466,8 +462,6 @@ def test_optimisations(self):
self.assertIs(text % (), text)
self.assertIs(text.format(), text)

# TODO: RustPython missing complex.__format__ implementation
@unittest.expectedFailure
def test_precision(self):
f = 1.2
self.assertEqual(format(f, ".0f"), "1")
Expand Down
131 changes: 116 additions & 15 deletions common/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use float_ops::Case;
use itertools::{Itertools, PeekingNext};
use num_bigint::{BigInt, Sign};
use num_traits::{cast::ToPrimitive, Signed};
use std::{cmp, str::FromStr};
use std::{cmp::Ordering, str::FromStr};

trait FormatParse {
fn parse(text: &str) -> (Option<Self>, &str)
Expand Down Expand Up @@ -108,6 +108,15 @@ pub enum FormatGrouping {
Underscore,
}

impl FormatGrouping {
fn to_char(&self) -> char {
match self {
Self::Comma => ',',
Self::Underscore => '_',
}
}
}

impl FormatParse for FormatGrouping {
fn parse(text: &str) -> (Option<Self>, &str) {
let mut chars = text.chars();
Expand All @@ -119,6 +128,19 @@ impl FormatParse for FormatGrouping {
}
}

#[derive(Debug)]
#[repr(u8)]
enum PaddingStyle {
Whitespace = b' ',
Zero = b'0',
}

impl From<PaddingStyle> for char {
fn from(from: PaddingStyle) -> char {
from as u8 as char
}
}

#[derive(Debug, PartialEq)]
pub enum FormatType {
String,
Expand Down Expand Up @@ -312,13 +334,15 @@ impl FormatSpec {
inter: i32,
sep: char,
disp_digit_cnt: i32,
padding: PaddingStyle,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need this padding. It only allows whitespace or 0, but whitespace padding doesn't add seprator.

) -> String {
// Don't add separators to the floating decimal point of numbers
let mut parts = magnitude_str.splitn(2, '.');
let magnitude_int_str = parts.next().unwrap().to_string();
let dec_digit_cnt = magnitude_str.len() as i32 - magnitude_int_str.len() as i32;
let int_digit_cnt = disp_digit_cnt - dec_digit_cnt;
let mut result = FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt);
let mut result =
FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt, padding);
if let Some(part) = parts.next() {
result.push_str(&format!(".{part}"))
}
Expand All @@ -330,6 +354,7 @@ impl FormatSpec {
inter: i32,
sep: char,
disp_digit_cnt: i32,
padding: PaddingStyle,
) -> String {
let magnitude_len = magnitude_str.len() as i32;
let offset = (disp_digit_cnt % (inter + 1) == 0) as i32;
Expand All @@ -338,7 +363,8 @@ impl FormatSpec {
if pad_cnt > 0 {
// separate with 0 padding
let sep_cnt = disp_digit_cnt / (inter + 1);
let padding = "0".repeat((pad_cnt - sep_cnt) as usize);
let padding_char = padding.into();
let padding = FormatSpec::compute_fill_string(padding_char, pad_cnt - sep_cnt);
let padded_num = format!("{padding}{magnitude_str}");
FormatSpec::insert_separator(padded_num, inter, sep, sep_cnt)
} else {
Expand Down Expand Up @@ -384,29 +410,33 @@ impl FormatSpec {

fn get_separator_interval(&self) -> usize {
match self.format_type {
Some(FormatType::Number) => 9999,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meaning something or just intended a big number? I think bigint can have more digits than 9999.

Some(FormatType::Binary | FormatType::Octal | FormatType::Hex(_)) => 4,
Some(FormatType::Decimal | FormatType::Number | FormatType::FixedPoint(_)) => 3,
Some(FormatType::Decimal | FormatType::FixedPoint(_)) => 3,
None => 3,
_ => panic!("Separators only valid for numbers!"),
}
}

fn add_magnitude_separators(&self, magnitude_str: String, prefix: &str) -> String {
fn add_magnitude_separators(
&self,
magnitude_str: String,
prefix: &str,
padding: PaddingStyle,
) -> String {
match &self.grouping_option {
Some(fg) => {
let sep = match fg {
FormatGrouping::Comma => ',',
FormatGrouping::Underscore => '_',
};
Some(grouping) => {
let sep = grouping.to_char();
let inter = self.get_separator_interval().try_into().unwrap();
let magnitude_len = magnitude_str.len();
let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32;
let disp_digit_cnt = cmp::max(width, magnitude_len as i32);
let disp_digit_cnt = std::cmp::max(width, magnitude_len as i32);
FormatSpec::add_magnitude_separators_for_char(
magnitude_str,
inter,
sep,
disp_digit_cnt,
padding,
)
}
None => magnitude_str,
Expand All @@ -433,7 +463,42 @@ impl FormatSpec {
let ch = char::from(self.format_type.as_ref().unwrap());
Err(FormatSpecError::UnknownFormatCode(ch, "float"))
}
Some(FormatType::Number) => Err(FormatSpecError::NotImplemented('n', "float")),
Some(FormatType::Number) => {
const UPPER_BOUND: f64 = 1e+5;
const LOWER_BOUND: f64 = 1e-4;
const ZERO_SEP: char = '\x00';

let show_as_exponential = magnitude >= UPPER_BOUND || magnitude <= LOWER_BOUND;
let precision = if precision == 0 { 1 } else { precision };
let mut inter = self.get_separator_interval();

let width = self.width.unwrap_or(precision);
let disp_digit_cnt = width;

let magnitude_str: String = if show_as_exponential {
float_ops::format_general(
precision,
magnitude,
Case::Lower,
self.alternate_form,
true,
)
} else {
// TODO: currently, `nan` and `inf` are not work properly. should handle this
if magnitude.is_nan() {
return Ok("nan".to_string());
}
self.check_number_formatting_cases(magnitude, width, precision, &mut inter)
};

Ok(FormatSpec::add_magnitude_separators_for_char(
magnitude_str,
inter as i32,
ZERO_SEP,
disp_digit_cnt as i32,
PaddingStyle::Whitespace,
))
}
Some(FormatType::GeneralFormat(case)) => {
let precision = if precision == 0 { 1 } else { precision };
Ok(float_ops::format_general(
Expand Down Expand Up @@ -487,14 +552,49 @@ impl FormatSpec {
FormatSign::MinusOrSpace => " ",
}
};
let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str);
let magnitude_str =
self.add_magnitude_separators(raw_magnitude_str?, sign_str, PaddingStyle::Zero);
self.format_sign_and_align(
unsafe { &BorrowedStr::from_ascii_unchecked(magnitude_str.as_bytes()) },
sign_str,
FormatAlign::Right,
)
}

fn check_number_formatting_cases(
&self,
magnitude: f64,
width: usize,
precision: usize,
inter: &mut usize,
) -> String {
let magnitude_string = magnitude.to_string();
let (integer, decimal) = magnitude_string
.split_once('.')
.unwrap_or((&magnitude_string, "0"));
let integer_len = integer.len();

match integer_len.cmp(&width) {
Ordering::Less => {
let diff: usize = precision - integer_len;
let mut decimal_string: String = decimal.to_string();
decimal_string.truncate(diff);
// TODO: add round up
*inter += 1;

integer.to_string() + "." + &decimal_string
}
Ordering::Greater => float_ops::format_general(
precision,
magnitude,
Case::Lower,
self.alternate_form,
true,
),
Ordering::Equal => integer.to_string(),
}
}

#[inline]
fn format_int_radix(&self, magnitude: BigInt, radix: u32) -> Result<String, FormatSpecError> {
match self.precision {
Expand Down Expand Up @@ -559,7 +659,8 @@ impl FormatSpec {
},
};
let sign_prefix = format!("{sign_str}{prefix}");
let magnitude_str = self.add_magnitude_separators(raw_magnitude_str, &sign_prefix);
let magnitude_str =
self.add_magnitude_separators(raw_magnitude_str, &sign_prefix, PaddingStyle::Zero);
self.format_sign_and_align(
&BorrowedStr::from_bytes(magnitude_str.as_bytes()),
&sign_prefix,
Expand Down Expand Up @@ -596,7 +697,7 @@ impl FormatSpec {
let num_chars = magnitude_str.char_len();
let fill_char = self.fill.unwrap_or(' ');
let fill_chars_needed: i32 = self.width.map_or(0, |w| {
cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32))
std::cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32))
});
Ok(match align {
FormatAlign::Left => format!(
Expand Down
33 changes: 33 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,39 @@ def try_mutate_str():
assert '{:g}'.format(1.020e-13) == '1.02e-13'
assert "{:g}".format(1.020e-4) == '0.000102'

def test_number_formatting():
assert '{:n}'.format(0) == '0'
assert '{:2n}'.format(1) == ' 1'
assert '{:5n}'.format(10) == ' 10'
assert '{:5n}'.format(100) == ' 100'
assert '{:5n}'.format(1000) == ' 1000'
assert '{:5n}'.format(10000) == '10000'
#TODO: should remove trailing zero if the `magnitude` is given by exponential style
assert '{:n}'.format(123.123) == '123.123'
assert '{:2n}'.format(123.123) == '123.123'
assert '{:5n}'.format(123.123) == '123.123'
assert '{:.1n}'.format(1.12345) == '1'
assert '{:.2n}'.format(1.12345) == '1.1'
assert '{:.3n}'.format(1.12345) == '1.12'
assert '{:.4n}'.format(1.12345) == '1.123'
assert '{:.6n}'.format(1.12345) == '1.12345'
assert '{:.1n}'.format(12345.12345) == '1e+04'
assert '{:.2n}'.format(12345.12345) == '1.2e+04'
assert '{:.3n}'.format(12345.12345) == '1.23e+04'
assert '{:.4n}'.format(12345.12345) == '1.235e+04'
assert '{:5n}'.format(1234567.0) == '1.23457e+06'
assert '{:5n}'.format(1234567.1) == '1.23457e+06'
assert '{:.6n}'.format(12345.12345) == '12345.1'
assert '{:.7n}'.format(12345.12345) == '12345.12'
assert '{:.8n}'.format(12345.12345) == '12345.123'
assert '{:.9n}'.format(12345.12345) == '12345.1234'
assert '{:.10n}'.format(12345.12345) == '12345.12345'
assert '{:.11n}'.format(12345.12345) == '12345.12345'

##TODO `nan` and `inf` are not fully supported yet
assert '{:n}'.format(float('nan')) == 'nan'
assert '{:n}'.format(float('-nan')) == 'nan'

# remove*fix test
def test_removeprefix():
s = 'foobarfoo'
Expand Down