Skip to content

Fix float parsing #5643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions Lib/test/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class OtherFloatSubclass(float):

class GeneralFloatCases(unittest.TestCase):

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_float(self):
self.assertEqual(float(3.14), 3.14)
self.assertEqual(float(314), 314.0)
Expand Down
43 changes: 43 additions & 0 deletions common/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,49 @@ macro_rules! ascii {
}
pub use ascii;

// TODO: this should probably live in a crate like unic or unicode-properties
const UNICODE_DECIMAL_VALUES: &[char] = &[
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨',
'٩', '۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '߀', '߁', '߂', '߃', '߄', '߅', '߆', '߇',
'߈', '߉', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '০', '১', '২', '৩', '৪', '৫', '৬',
'৭', '৮', '৯', '੦', '੧', '੨', '੩', '੪', '੫', '੬', '੭', '੮', '੯', '૦', '૧', '૨', '૩', '૪', '૫',
'૬', '૭', '૮', '૯', '୦', '୧', '୨', '୩', '୪', '୫', '୬', '୭', '୮', '୯', '௦', '௧', '௨', '௩', '௪',
'௫', '௬', '௭', '௮', '௯', '౦', '౧', '౨', '౩', '౪', '౫', '౬', '౭', '౮', '౯', '೦', '೧', '೨', '೩',
'೪', '೫', '೬', '೭', '೮', '೯', '൦', '൧', '൨', '൩', '൪', '൫', '൬', '൭', '൮', '൯', '෦', '෧', '෨',
'෩', '෪', '෫', '෬', '෭', '෮', '෯', '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', '໐', '໑',
'໒', '໓', '໔', '໕', '໖', '໗', '໘', '໙', '༠', '༡', '༢', '༣', '༤', '༥', '༦', '༧', '༨', '༩', '၀',
'၁', '၂', '၃', '၄', '၅', '၆', '၇', '၈', '၉', '႐', '႑', '႒', '႓', '႔', '႕', '႖', '႗', '႘', '႙',
'០', '១', '២', '៣', '៤', '៥', '៦', '៧', '៨', '៩', '᠐', '᠑', '᠒', '᠓', '᠔', '᠕', '᠖', '᠗', '᠘',
'᠙', '᥆', '᥇', '᥈', '᥉', '᥊', '᥋', '᥌', '᥍', '᥎', '᥏', '᧐', '᧑', '᧒', '᧓', '᧔', '᧕', '᧖', '᧗',
'᧘', '᧙', '᪀', '᪁', '᪂', '᪃', '᪄', '᪅', '᪆', '᪇', '᪈', '᪉', '᪐', '᪑', '᪒', '᪓', '᪔', '᪕', '᪖',
'᪗', '᪘', '᪙', '᭐', '᭑', '᭒', '᭓', '᭔', '᭕', '᭖', '᭗', '᭘', '᭙', '᮰', '᮱', '᮲', '᮳', '᮴', '᮵',
'᮶', '᮷', '᮸', '᮹', '᱀', '᱁', '᱂', '᱃', '᱄', '᱅', '᱆', '᱇', '᱈', '᱉', '᱐', '᱑', '᱒', '᱓', '᱔',
'᱕', '᱖', '᱗', '᱘', '᱙', '꘠', '꘡', '꘢', '꘣', '꘤', '꘥', '꘦', '꘧', '꘨', '꘩', '꣐', '꣑', '꣒', '꣓',
'꣔', '꣕', '꣖', '꣗', '꣘', '꣙', '꤀', '꤁', '꤂', '꤃', '꤄', '꤅', '꤆', '꤇', '꤈', '꤉', '꧐', '꧑', '꧒',
'꧓', '꧔', '꧕', '꧖', '꧗', '꧘', '꧙', '꧰', '꧱', '꧲', '꧳', '꧴', '꧵', '꧶', '꧷', '꧸', '꧹', '꩐', '꩑',
'꩒', '꩓', '꩔', '꩕', '꩖', '꩗', '꩘', '꩙', '꯰', '꯱', '꯲', '꯳', '꯴', '꯵', '꯶', '꯷', '꯸', '꯹', '0',
'1', '2', '3', '4', '5', '6', '7', '8', '9', '𐒠', '𐒡', '𐒢', '𐒣', '𐒤', '𐒥', '𐒦', '𐒧',
'𐒨', '𐒩', '𑁦', '𑁧', '𑁨', '𑁩', '𑁪', '𑁫', '𑁬', '𑁭', '𑁮', '𑁯', '𑃰', '𑃱', '𑃲', '𑃳', '𑃴', '𑃵', '𑃶',
'𑃷', '𑃸', '𑃹', '𑄶', '𑄷', '𑄸', '𑄹', '𑄺', '𑄻', '𑄼', '𑄽', '𑄾', '𑄿', '𑇐', '𑇑', '𑇒', '𑇓', '𑇔', '𑇕',
'𑇖', '𑇗', '𑇘', '𑇙', '𑋰', '𑋱', '𑋲', '𑋳', '𑋴', '𑋵', '𑋶', '𑋷', '𑋸', '𑋹', '𑑐', '𑑑', '𑑒', '𑑓', '𑑔',
'𑑕', '𑑖', '𑑗', '𑑘', '𑑙', '𑓐', '𑓑', '𑓒', '𑓓', '𑓔', '𑓕', '𑓖', '𑓗', '𑓘', '𑓙', '𑙐', '𑙑', '𑙒', '𑙓',
'𑙔', '𑙕', '𑙖', '𑙗', '𑙘', '𑙙', '𑛀', '𑛁', '𑛂', '𑛃', '𑛄', '𑛅', '𑛆', '𑛇', '𑛈', '𑛉', '𑜰', '𑜱', '𑜲',
'𑜳', '𑜴', '𑜵', '𑜶', '𑜷', '𑜸', '𑜹', '𑣠', '𑣡', '𑣢', '𑣣', '𑣤', '𑣥', '𑣦', '𑣧', '𑣨', '𑣩', '𑱐', '𑱑',
'𑱒', '𑱓', '𑱔', '𑱕', '𑱖', '𑱗', '𑱘', '𑱙', '𑵐', '𑵑', '𑵒', '𑵓', '𑵔', '𑵕', '𑵖', '𑵗', '𑵘', '𑵙', '𖩠',
'𖩡', '𖩢', '𖩣', '𖩤', '𖩥', '𖩦', '𖩧', '𖩨', '𖩩', '𖭐', '𖭑', '𖭒', '𖭓', '𖭔', '𖭕', '𖭖', '𖭗', '𖭘', '𖭙',
'𝟎', '𝟏', '𝟐', '𝟑', '𝟒', '𝟓', '𝟔', '𝟕', '𝟖', '𝟗', '𝟘', '𝟙', '𝟚', '𝟛', '𝟜', '𝟝', '𝟞', '𝟟', '𝟠',
'𝟡', '𝟢', '𝟣', '𝟤', '𝟥', '𝟦', '𝟧', '𝟨', '𝟩', '𝟪', '𝟫', '𝟬', '𝟭', '𝟮', '𝟯', '𝟰', '𝟱', '𝟲', '𝟳',
'𝟴', '𝟵', '𝟶', '𝟷', '𝟸', '𝟹', '𝟺', '𝟻', '𝟼', '𝟽', '𝟾', '𝟿', '𞥐', '𞥑', '𞥒', '𞥓', '𞥔', '𞥕', '𞥖',
'𞥗', '𞥘', '𞥙',
];

pub fn char_to_decimal(ch: char) -> Option<u8> {
UNICODE_DECIMAL_VALUES
.binary_search(&ch)
.ok()
.map(|i| (i % 10) as u8)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion compiler/codegen/src/unparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ impl<'a, 'b, 'c> Unparser<'a, 'b, 'c> {
}
}
&ruff::Number::Complex { real, imag } => self
.p(&rustpython_literal::float::complex_to_string(real, imag)
.p(&rustpython_literal::complex::to_string(real, imag)
.replace("inf", inf_str))?,
}
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/literal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ rustpython-wtf8 = { workspace = true }

hexf-parse = "0.2.1"
is-macro.workspace = true
lexical-parse-float = { version = "0.8.0", features = ["format"] }
lexical-parse-float = { version = "1.0.4", features = ["format"] }
num-traits = { workspace = true }
unic-ucd-category = { workspace = true }

Expand Down
73 changes: 73 additions & 0 deletions compiler/literal/src/complex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::float;

/// Convert a complex number to a string.
pub fn to_string(re: f64, im: f64) -> String {
// integer => drop ., fractional => float_ops
let mut im_part = if im.fract() == 0.0 {
im.to_string()
} else {
float::to_string(im)
};
im_part.push('j');

// positive empty => return im_part, integer => drop ., fractional => float_ops
let re_part = if re == 0.0 {
if re.is_sign_positive() {
return im_part;
} else {
"-0".to_owned()
}
} else if re.fract() == 0.0 {
re.to_string()
} else {
float::to_string(re)
};
let mut result =
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
result.push('(');
result.push_str(&re_part);
if im.is_sign_positive() || im.is_nan() {
result.push('+');
}
result.push_str(&im_part);
result.push(')');
result
}

/// Parse a complex number from a string.
///
/// Returns `Some((re, im))` on success.
pub fn parse_str(s: &str) -> Option<(f64, f64)> {
let s = s.trim();
// Handle parentheses
let s = match s.strip_prefix('(') {
None => s,
Some(s) => s.strip_suffix(')')?.trim(),
};

let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
None => (float::parse_str(s)?, 0.0),
Some(mut s) => {
let mut real = 0.0;
// Find the central +/- operator. If it exists, parse the real part.
for (i, w) in s.as_bytes().windows(2).enumerate() {
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
real = float::parse_str(&s[..=i])?;
s = &s[i + 1..];
break;
}
}

let imag = match s {
// "j", "+j"
"" | "+" => 1.0,
// "-j"
"-" => -1.0,
s => float::parse_str(s)?,
};

(real, imag)
}
};
Some(value)
}
82 changes: 2 additions & 80 deletions compiler/literal/src/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,65 +6,20 @@ pub fn parse_str(literal: &str) -> Option<f64> {
parse_inner(literal.trim().as_bytes())
}

fn strip_underlines(literal: &[u8]) -> Option<Vec<u8>> {
let mut prev = b'\0';
let mut dup = Vec::<u8>::new();
for p in literal {
if *p == b'_' {
// Underscores are only allowed after digits.
if !prev.is_ascii_digit() {
return None;
}
} else {
dup.push(*p);
// Underscores are only allowed before digits.
if prev == b'_' && !p.is_ascii_digit() {
return None;
}
}
prev = *p;
}

// Underscores are not allowed at the end.
if prev == b'_' {
return None;
}

Some(dup)
}

pub fn parse_bytes(literal: &[u8]) -> Option<f64> {
parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace()))
}

fn trim_slice<T>(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] {
let mut it = v.iter();
// it.take_while_ref(&mut trim).for_each(drop);
// hmm.. `&mut slice::Iter<_>` is not `Clone`
// it.by_ref().rev().take_while_ref(&mut trim).for_each(drop);
while it.clone().next().is_some_and(&mut trim) {
it.next();
}
while it.clone().next_back().is_some_and(&mut trim) {
it.next_back();
}
it.as_slice()
parse_inner(literal.trim_ascii())
}

fn parse_inner(literal: &[u8]) -> Option<f64> {
use lexical_parse_float::{
FromLexicalWithOptions, NumberFormatBuilder, Options, format::PYTHON3_LITERAL,
};

// Use custom function for underline handling for now.
// For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96.
let stripped = strip_underlines(literal)?;

// lexical-core's format::PYTHON_STRING is inaccurate
const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL)
.no_special(false)
.build();
f64::from_lexical_with_options::<PYTHON_STRING>(&stripped, &Options::new()).ok()
f64::from_lexical_with_options::<PYTHON_STRING>(literal, &Options::new()).ok()
}

pub fn is_integer(v: f64) -> bool {
Expand Down Expand Up @@ -223,39 +178,6 @@ pub fn to_string(value: f64) -> String {
}
}

pub fn complex_to_string(re: f64, im: f64) -> String {
// integer => drop ., fractional => float_ops
let mut im_part = if im.fract() == 0.0 {
im.to_string()
} else {
to_string(im)
};
im_part.push('j');

// positive empty => return im_part, integer => drop ., fractional => float_ops
let re_part = if re == 0.0 {
if re.is_sign_positive() {
return im_part;
} else {
re.to_string()
}
} else if re.fract() == 0.0 {
re.to_string()
} else {
to_string(re)
};
let mut result =
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
result.push('(');
result.push_str(&re_part);
if im.is_sign_positive() || im.is_nan() {
result.push('+');
}
result.push_str(&im_part);
result.push(')');
result
}

pub fn from_hex(s: &str) -> Option<f64> {
if let Ok(f) = hexf_parse::parse_hexf64(s, false) {
return Some(f);
Expand Down
1 change: 1 addition & 0 deletions compiler/literal/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod char;
pub mod complex;
pub mod escape;
pub mod float;
pub mod format;
45 changes: 4 additions & 41 deletions vm/src/builtins/complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,13 @@ impl Constructor for PyComplex {
"complex() can't take second arg if first is a string".to_owned(),
));
}
let value = s
let (re, im) = s
.to_str()
.and_then(|s| parse_str(s.trim()))
.and_then(rustpython_literal::complex::parse_str)
.ok_or_else(|| {
vm.new_value_error("complex() arg is a malformed string".to_owned())
})?;
return Self::from(value)
return Self::from(Complex64 { re, im })
.into_ref_with_type(vm, cls)
.map(Into::into);
} else {
Expand Down Expand Up @@ -494,7 +494,7 @@ impl Representable for PyComplex {
// TODO: when you fix this, move it to rustpython_common::complex::repr and update
// ast/src/unparse.rs + impl Display for Constant in ast/src/constant.rs
let Complex64 { re, im } = zelf.value;
Ok(rustpython_literal::float::complex_to_string(re, im))
Ok(rustpython_literal::complex::to_string(re, im))
}
}

Expand All @@ -519,40 +519,3 @@ pub struct ComplexArgs {
#[pyarg(any, optional)]
imag: OptionalArg<PyObjectRef>,
}

fn parse_str(s: &str) -> Option<Complex64> {
// Handle parentheses
let s = match s.strip_prefix('(') {
None => s,
Some(s) => match s.strip_suffix(')') {
None => return None,
Some(s) => s.trim(),
},
};

let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
None => Complex64::new(crate::literal::float::parse_str(s)?, 0.0),
Some(mut s) => {
let mut real = 0.0;
// Find the central +/- operator. If it exists, parse the real part.
for (i, w) in s.as_bytes().windows(2).enumerate() {
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
real = crate::literal::float::parse_str(&s[..=i])?;
s = &s[i + 1..];
break;
}
}

let imag = match s {
// "j", "+j"
"" | "+" => 1.0,
// "-j"
"-" => -1.0,
s => crate::literal::float::parse_str(s)?,
};

Complex64::new(real, imag)
}
};
Some(value)
}
Loading
Loading