Skip to content

Commit 160363f

Browse files
authored
Fix float parsing (#5643)
* Fix float parsing * Add rustpython_literal::complex * Don't call .to_string() on a constant
1 parent 0b35946 commit 160363f

File tree

10 files changed

+155
-133
lines changed

10 files changed

+155
-133
lines changed

Cargo.lock

+6-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_float.py

-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ class OtherFloatSubclass(float):
3535

3636
class GeneralFloatCases(unittest.TestCase):
3737

38-
# TODO: RUSTPYTHON
39-
@unittest.expectedFailure
4038
def test_float(self):
4139
self.assertEqual(float(3.14), 3.14)
4240
self.assertEqual(float(314), 314.0)

common/src/str.rs

+43
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,49 @@ macro_rules! ascii {
609609
}
610610
pub use ascii;
611611

612+
// TODO: this should probably live in a crate like unic or unicode-properties
613+
const UNICODE_DECIMAL_VALUES: &[char] = &[
614+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨',
615+
'٩', '۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '߀', '߁', '߂', '߃', '߄', '߅', '߆', '߇',
616+
'߈', '߉', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '০', '১', '২', '৩', '৪', '৫', '৬',
617+
'৭', '৮', '৯', '੦', '੧', '੨', '੩', '੪', '੫', '੬', '੭', '੮', '੯', '૦', '૧', '૨', '૩', '૪', '૫',
618+
'૬', '૭', '૮', '૯', '୦', '୧', '୨', '୩', '୪', '୫', '୬', '୭', '୮', '୯', '௦', '௧', '௨', '௩', '௪',
619+
'௫', '௬', '௭', '௮', '௯', '౦', '౧', '౨', '౩', '౪', '౫', '౬', '౭', '౮', '౯', '೦', '೧', '೨', '೩',
620+
'೪', '೫', '೬', '೭', '೮', '೯', '൦', '൧', '൨', '൩', '൪', '൫', '൬', '൭', '൮', '൯', '෦', '෧', '෨',
621+
'෩', '෪', '෫', '෬', '෭', '෮', '෯', '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', '໐', '໑',
622+
'໒', '໓', '໔', '໕', '໖', '໗', '໘', '໙', '༠', '༡', '༢', '༣', '༤', '༥', '༦', '༧', '༨', '༩', '၀',
623+
'၁', '၂', '၃', '၄', '၅', '၆', '၇', '၈', '၉', '႐', '႑', '႒', '႓', '႔', '႕', '႖', '႗', '႘', '႙',
624+
'០', '១', '២', '៣', '៤', '៥', '៦', '៧', '៨', '៩', '᠐', '᠑', '᠒', '᠓', '᠔', '᠕', '᠖', '᠗', '᠘',
625+
'᠙', '᥆', '᥇', '᥈', '᥉', '᥊', '᥋', '᥌', '᥍', '᥎', '᥏', '᧐', '᧑', '᧒', '᧓', '᧔', '᧕', '᧖', '᧗',
626+
'᧘', '᧙', '᪀', '᪁', '᪂', '᪃', '᪄', '᪅', '᪆', '᪇', '᪈', '᪉', '᪐', '᪑', '᪒', '᪓', '᪔', '᪕', '᪖',
627+
'᪗', '᪘', '᪙', '᭐', '᭑', '᭒', '᭓', '᭔', '᭕', '᭖', '᭗', '᭘', '᭙', '᮰', '᮱', '᮲', '᮳', '᮴', '᮵',
628+
'᮶', '᮷', '᮸', '᮹', '᱀', '᱁', '᱂', '᱃', '᱄', '᱅', '᱆', '᱇', '᱈', '᱉', '᱐', '᱑', '᱒', '᱓', '᱔',
629+
'᱕', '᱖', '᱗', '᱘', '᱙', '꘠', '꘡', '꘢', '꘣', '꘤', '꘥', '꘦', '꘧', '꘨', '꘩', '꣐', '꣑', '꣒', '꣓',
630+
'꣔', '꣕', '꣖', '꣗', '꣘', '꣙', '꤀', '꤁', '꤂', '꤃', '꤄', '꤅', '꤆', '꤇', '꤈', '꤉', '꧐', '꧑', '꧒',
631+
'꧓', '꧔', '꧕', '꧖', '꧗', '꧘', '꧙', '꧰', '꧱', '꧲', '꧳', '꧴', '꧵', '꧶', '꧷', '꧸', '꧹', '꩐', '꩑',
632+
'꩒', '꩓', '꩔', '꩕', '꩖', '꩗', '꩘', '꩙', '꯰', '꯱', '꯲', '꯳', '꯴', '꯵', '꯶', '꯷', '꯸', '꯹', '0',
633+
'1', '2', '3', '4', '5', '6', '7', '8', '9', '𐒠', '𐒡', '𐒢', '𐒣', '𐒤', '𐒥', '𐒦', '𐒧',
634+
'𐒨', '𐒩', '𑁦', '𑁧', '𑁨', '𑁩', '𑁪', '𑁫', '𑁬', '𑁭', '𑁮', '𑁯', '𑃰', '𑃱', '𑃲', '𑃳', '𑃴', '𑃵', '𑃶',
635+
'𑃷', '𑃸', '𑃹', '𑄶', '𑄷', '𑄸', '𑄹', '𑄺', '𑄻', '𑄼', '𑄽', '𑄾', '𑄿', '𑇐', '𑇑', '𑇒', '𑇓', '𑇔', '𑇕',
636+
'𑇖', '𑇗', '𑇘', '𑇙', '𑋰', '𑋱', '𑋲', '𑋳', '𑋴', '𑋵', '𑋶', '𑋷', '𑋸', '𑋹', '𑑐', '𑑑', '𑑒', '𑑓', '𑑔',
637+
'𑑕', '𑑖', '𑑗', '𑑘', '𑑙', '𑓐', '𑓑', '𑓒', '𑓓', '𑓔', '𑓕', '𑓖', '𑓗', '𑓘', '𑓙', '𑙐', '𑙑', '𑙒', '𑙓',
638+
'𑙔', '𑙕', '𑙖', '𑙗', '𑙘', '𑙙', '𑛀', '𑛁', '𑛂', '𑛃', '𑛄', '𑛅', '𑛆', '𑛇', '𑛈', '𑛉', '𑜰', '𑜱', '𑜲',
639+
'𑜳', '𑜴', '𑜵', '𑜶', '𑜷', '𑜸', '𑜹', '𑣠', '𑣡', '𑣢', '𑣣', '𑣤', '𑣥', '𑣦', '𑣧', '𑣨', '𑣩', '𑱐', '𑱑',
640+
'𑱒', '𑱓', '𑱔', '𑱕', '𑱖', '𑱗', '𑱘', '𑱙', '𑵐', '𑵑', '𑵒', '𑵓', '𑵔', '𑵕', '𑵖', '𑵗', '𑵘', '𑵙', '𖩠',
641+
'𖩡', '𖩢', '𖩣', '𖩤', '𖩥', '𖩦', '𖩧', '𖩨', '𖩩', '𖭐', '𖭑', '𖭒', '𖭓', '𖭔', '𖭕', '𖭖', '𖭗', '𖭘', '𖭙',
642+
'𝟎', '𝟏', '𝟐', '𝟑', '𝟒', '𝟓', '𝟔', '𝟕', '𝟖', '𝟗', '𝟘', '𝟙', '𝟚', '𝟛', '𝟜', '𝟝', '𝟞', '𝟟', '𝟠',
643+
'𝟡', '𝟢', '𝟣', '𝟤', '𝟥', '𝟦', '𝟧', '𝟨', '𝟩', '𝟪', '𝟫', '𝟬', '𝟭', '𝟮', '𝟯', '𝟰', '𝟱', '𝟲', '𝟳',
644+
'𝟴', '𝟵', '𝟶', '𝟷', '𝟸', '𝟹', '𝟺', '𝟻', '𝟼', '𝟽', '𝟾', '𝟿', '𞥐', '𞥑', '𞥒', '𞥓', '𞥔', '𞥕', '𞥖',
645+
'𞥗', '𞥘', '𞥙',
646+
];
647+
648+
pub fn char_to_decimal(ch: char) -> Option<u8> {
649+
UNICODE_DECIMAL_VALUES
650+
.binary_search(&ch)
651+
.ok()
652+
.map(|i| (i % 10) as u8)
653+
}
654+
612655
#[cfg(test)]
613656
mod tests {
614657
use super::*;

compiler/codegen/src/unparse.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ impl<'a, 'b, 'c> Unparser<'a, 'b, 'c> {
366366
}
367367
}
368368
&ruff::Number::Complex { real, imag } => self
369-
.p(&rustpython_literal::float::complex_to_string(real, imag)
369+
.p(&rustpython_literal::complex::to_string(real, imag)
370370
.replace("inf", inf_str))?,
371371
}
372372
}

compiler/literal/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ rustpython-wtf8 = { workspace = true }
1313

1414
hexf-parse = "0.2.1"
1515
is-macro.workspace = true
16-
lexical-parse-float = { version = "0.8.0", features = ["format"] }
16+
lexical-parse-float = { version = "1.0.4", features = ["format"] }
1717
num-traits = { workspace = true }
1818
unic-ucd-category = { workspace = true }
1919

compiler/literal/src/complex.rs

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use crate::float;
2+
3+
/// Convert a complex number to a string.
4+
pub fn to_string(re: f64, im: f64) -> String {
5+
// integer => drop ., fractional => float_ops
6+
let mut im_part = if im.fract() == 0.0 {
7+
im.to_string()
8+
} else {
9+
float::to_string(im)
10+
};
11+
im_part.push('j');
12+
13+
// positive empty => return im_part, integer => drop ., fractional => float_ops
14+
let re_part = if re == 0.0 {
15+
if re.is_sign_positive() {
16+
return im_part;
17+
} else {
18+
"-0".to_owned()
19+
}
20+
} else if re.fract() == 0.0 {
21+
re.to_string()
22+
} else {
23+
float::to_string(re)
24+
};
25+
let mut result =
26+
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
27+
result.push('(');
28+
result.push_str(&re_part);
29+
if im.is_sign_positive() || im.is_nan() {
30+
result.push('+');
31+
}
32+
result.push_str(&im_part);
33+
result.push(')');
34+
result
35+
}
36+
37+
/// Parse a complex number from a string.
38+
///
39+
/// Returns `Some((re, im))` on success.
40+
pub fn parse_str(s: &str) -> Option<(f64, f64)> {
41+
let s = s.trim();
42+
// Handle parentheses
43+
let s = match s.strip_prefix('(') {
44+
None => s,
45+
Some(s) => s.strip_suffix(')')?.trim(),
46+
};
47+
48+
let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
49+
None => (float::parse_str(s)?, 0.0),
50+
Some(mut s) => {
51+
let mut real = 0.0;
52+
// Find the central +/- operator. If it exists, parse the real part.
53+
for (i, w) in s.as_bytes().windows(2).enumerate() {
54+
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
55+
real = float::parse_str(&s[..=i])?;
56+
s = &s[i + 1..];
57+
break;
58+
}
59+
}
60+
61+
let imag = match s {
62+
// "j", "+j"
63+
"" | "+" => 1.0,
64+
// "-j"
65+
"-" => -1.0,
66+
s => float::parse_str(s)?,
67+
};
68+
69+
(real, imag)
70+
}
71+
};
72+
Some(value)
73+
}

compiler/literal/src/float.rs

+2-80
Original file line numberDiff line numberDiff line change
@@ -6,65 +6,20 @@ pub fn parse_str(literal: &str) -> Option<f64> {
66
parse_inner(literal.trim().as_bytes())
77
}
88

9-
fn strip_underlines(literal: &[u8]) -> Option<Vec<u8>> {
10-
let mut prev = b'\0';
11-
let mut dup = Vec::<u8>::new();
12-
for p in literal {
13-
if *p == b'_' {
14-
// Underscores are only allowed after digits.
15-
if !prev.is_ascii_digit() {
16-
return None;
17-
}
18-
} else {
19-
dup.push(*p);
20-
// Underscores are only allowed before digits.
21-
if prev == b'_' && !p.is_ascii_digit() {
22-
return None;
23-
}
24-
}
25-
prev = *p;
26-
}
27-
28-
// Underscores are not allowed at the end.
29-
if prev == b'_' {
30-
return None;
31-
}
32-
33-
Some(dup)
34-
}
35-
369
pub fn parse_bytes(literal: &[u8]) -> Option<f64> {
37-
parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace()))
38-
}
39-
40-
fn trim_slice<T>(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] {
41-
let mut it = v.iter();
42-
// it.take_while_ref(&mut trim).for_each(drop);
43-
// hmm.. `&mut slice::Iter<_>` is not `Clone`
44-
// it.by_ref().rev().take_while_ref(&mut trim).for_each(drop);
45-
while it.clone().next().is_some_and(&mut trim) {
46-
it.next();
47-
}
48-
while it.clone().next_back().is_some_and(&mut trim) {
49-
it.next_back();
50-
}
51-
it.as_slice()
10+
parse_inner(literal.trim_ascii())
5211
}
5312

5413
fn parse_inner(literal: &[u8]) -> Option<f64> {
5514
use lexical_parse_float::{
5615
FromLexicalWithOptions, NumberFormatBuilder, Options, format::PYTHON3_LITERAL,
5716
};
5817

59-
// Use custom function for underline handling for now.
60-
// For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96.
61-
let stripped = strip_underlines(literal)?;
62-
6318
// lexical-core's format::PYTHON_STRING is inaccurate
6419
const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL)
6520
.no_special(false)
6621
.build();
67-
f64::from_lexical_with_options::<PYTHON_STRING>(&stripped, &Options::new()).ok()
22+
f64::from_lexical_with_options::<PYTHON_STRING>(literal, &Options::new()).ok()
6823
}
6924

7025
pub fn is_integer(v: f64) -> bool {
@@ -223,39 +178,6 @@ pub fn to_string(value: f64) -> String {
223178
}
224179
}
225180

226-
pub fn complex_to_string(re: f64, im: f64) -> String {
227-
// integer => drop ., fractional => float_ops
228-
let mut im_part = if im.fract() == 0.0 {
229-
im.to_string()
230-
} else {
231-
to_string(im)
232-
};
233-
im_part.push('j');
234-
235-
// positive empty => return im_part, integer => drop ., fractional => float_ops
236-
let re_part = if re == 0.0 {
237-
if re.is_sign_positive() {
238-
return im_part;
239-
} else {
240-
re.to_string()
241-
}
242-
} else if re.fract() == 0.0 {
243-
re.to_string()
244-
} else {
245-
to_string(re)
246-
};
247-
let mut result =
248-
String::with_capacity(re_part.len() + im_part.len() + 2 + im.is_sign_positive() as usize);
249-
result.push('(');
250-
result.push_str(&re_part);
251-
if im.is_sign_positive() || im.is_nan() {
252-
result.push('+');
253-
}
254-
result.push_str(&im_part);
255-
result.push(')');
256-
result
257-
}
258-
259181
pub fn from_hex(s: &str) -> Option<f64> {
260182
if let Ok(f) = hexf_parse::parse_hexf64(s, false) {
261183
return Some(f);

compiler/literal/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod char;
2+
pub mod complex;
23
pub mod escape;
34
pub mod float;
45
pub mod format;

vm/src/builtins/complex.rs

+4-41
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,13 @@ impl Constructor for PyComplex {
179179
"complex() can't take second arg if first is a string".to_owned(),
180180
));
181181
}
182-
let value = s
182+
let (re, im) = s
183183
.to_str()
184-
.and_then(|s| parse_str(s.trim()))
184+
.and_then(rustpython_literal::complex::parse_str)
185185
.ok_or_else(|| {
186186
vm.new_value_error("complex() arg is a malformed string".to_owned())
187187
})?;
188-
return Self::from(value)
188+
return Self::from(Complex64 { re, im })
189189
.into_ref_with_type(vm, cls)
190190
.map(Into::into);
191191
} else {
@@ -494,7 +494,7 @@ impl Representable for PyComplex {
494494
// TODO: when you fix this, move it to rustpython_common::complex::repr and update
495495
// ast/src/unparse.rs + impl Display for Constant in ast/src/constant.rs
496496
let Complex64 { re, im } = zelf.value;
497-
Ok(rustpython_literal::float::complex_to_string(re, im))
497+
Ok(rustpython_literal::complex::to_string(re, im))
498498
}
499499
}
500500

@@ -519,40 +519,3 @@ pub struct ComplexArgs {
519519
#[pyarg(any, optional)]
520520
imag: OptionalArg<PyObjectRef>,
521521
}
522-
523-
fn parse_str(s: &str) -> Option<Complex64> {
524-
// Handle parentheses
525-
let s = match s.strip_prefix('(') {
526-
None => s,
527-
Some(s) => match s.strip_suffix(')') {
528-
None => return None,
529-
Some(s) => s.trim(),
530-
},
531-
};
532-
533-
let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
534-
None => Complex64::new(crate::literal::float::parse_str(s)?, 0.0),
535-
Some(mut s) => {
536-
let mut real = 0.0;
537-
// Find the central +/- operator. If it exists, parse the real part.
538-
for (i, w) in s.as_bytes().windows(2).enumerate() {
539-
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
540-
real = crate::literal::float::parse_str(&s[..=i])?;
541-
s = &s[i + 1..];
542-
break;
543-
}
544-
}
545-
546-
let imag = match s {
547-
// "j", "+j"
548-
"" | "+" => 1.0,
549-
// "-j"
550-
"-" => -1.0,
551-
s => crate::literal::float::parse_str(s)?,
552-
};
553-
554-
Complex64::new(real, imag)
555-
}
556-
};
557-
Some(value)
558-
}

0 commit comments

Comments
 (0)