Skip to content

Commit c2e52c2

Browse files
authored
Merge pull request #1889 from RustPython/coolreader18/_json-optimizations
Add the _json module and _json.make_scanner
2 parents c6d3b1b + 316ee37 commit c2e52c2

File tree

7 files changed

+250
-5
lines changed

7 files changed

+250
-5
lines changed

Lib/test/test_json/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
from test import support
77

88
# import json with and without accelerations
9-
cjson = support.import_fresh_module('json', fresh=['_json'])
9+
# XXX RUSTPYTHON: we don't import _json as fresh since the fresh module isn't placed
10+
# into the sys.modules cache, and therefore the vm can't recognize the _json.Scanner class
11+
cjson = support.import_fresh_module('json') #, fresh=['_json'])
1012
pyjson = support.import_fresh_module('json', blocked=['_json'])
1113
# JSONDecodeError is cached inside the _json module
12-
# XXX RustPython TODO: _json module
13-
# cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
14+
cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
1415

1516
# create two base classes that will be used by the other tests
1617
class PyTest(unittest.TestCase):
@@ -38,6 +39,7 @@ def test_pyjson(self):
3839
'json.encoder')
3940

4041
class TestCTest(CTest):
42+
@unittest.expectedFailure
4143
def test_cjson(self):
4244
self.assertEqual(self.json.scanner.make_scanner.__module__, '_json')
4345
self.assertEqual(self.json.decoder.scanstring.__module__, '_json')

Lib/test/test_json/test_decode.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from collections import OrderedDict
44
from test.test_json import PyTest, CTest
55

6+
import unittest
7+
68

79
class TestDecode:
810
def test_decimal(self):
@@ -55,6 +57,7 @@ def check_keys_reuse(self, source, loads):
5557
self.assertIs(a, c)
5658
self.assertIs(b, d)
5759

60+
@unittest.skip("TODO: RUSTPYTHON: cache/memoize keys")
5861
def test_keys_reuse(self):
5962
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
6063
self.check_keys_reuse(s, self.loads)

Lib/test/test_json/test_speedups.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
from test.test_json import CTest
22

3+
import unittest
34

45
class BadBool:
56
def __bool__(self):
67
1/0
78

89

910
class TestSpeedups(CTest):
11+
# TODO: RUSTPYTHON
12+
@unittest.expectedFailure
1013
def test_scanstring(self):
1114
self.assertEqual(self.json.decoder.scanstring.__module__, "_json")
1215
self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring)
1316

17+
# TODO: RUSTPYTHON
18+
@unittest.expectedFailure
1419
def test_encode_basestring_ascii(self):
1520
self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__,
1621
"_json")
@@ -38,6 +43,7 @@ def test_make_encoder(self):
3843
b"\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75",
3944
None)
4045

46+
@unittest.skip("TODO: RUSTPYTHON, translate the encoder to Rust")
4147
def test_bad_str_encoder(self):
4248
# Issue #31505: There shouldn't be an assertion failure in case
4349
# c_make_encoder() receives a bad encoder() argument.
@@ -59,6 +65,8 @@ def bad_encoder2(*args):
5965
with self.assertRaises(ZeroDivisionError):
6066
enc('spam', 4)
6167

68+
# TODO: RUSTPYTHON, translate the encoder to Rust
69+
@unittest.expectedFailure
6270
def test_bad_bool_args(self):
6371
def test(name):
6472
self.json.encoder.JSONEncoder(**{name: BadBool()}).encode({'a': 1})

vm/src/obj/objiter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ pub fn new_stop_iteration(vm: &VirtualMachine) -> PyBaseExceptionRef {
8080
let stop_iteration_type = vm.ctx.exceptions.stop_iteration.clone();
8181
vm.new_exception_empty(stop_iteration_type)
8282
}
83+
pub fn stop_iter_with_value(val: PyObjectRef, vm: &VirtualMachine) -> PyBaseExceptionRef {
84+
let stop_iteration_type = vm.ctx.exceptions.stop_iteration.clone();
85+
vm.new_exception(stop_iteration_type, vec![val])
86+
}
8387

8488
pub fn stop_iter_value(vm: &VirtualMachine, exc: &PyBaseExceptionRef) -> PyResult {
8589
let args = exc.args();

vm/src/stdlib/json.rs

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
use crate::obj::objiter;
2+
use crate::obj::objstr::PyStringRef;
3+
use crate::obj::{objbool, objtype::PyClassRef};
4+
use crate::pyobject::{IdProtocol, PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue};
5+
use crate::VirtualMachine;
6+
7+
use num_bigint::BigInt;
8+
use std::str::FromStr;
9+
10+
#[pyclass(name = "Scanner")]
11+
#[derive(Debug)]
12+
struct JsonScanner {
13+
strict: bool,
14+
object_hook: Option<PyObjectRef>,
15+
object_pairs_hook: Option<PyObjectRef>,
16+
parse_float: Option<PyObjectRef>,
17+
parse_int: Option<PyObjectRef>,
18+
parse_constant: PyObjectRef,
19+
ctx: PyObjectRef,
20+
}
21+
22+
impl PyValue for JsonScanner {
23+
fn class(vm: &VirtualMachine) -> PyClassRef {
24+
vm.class("_json", "make_scanner")
25+
}
26+
}
27+
28+
#[pyimpl]
29+
impl JsonScanner {
30+
#[pyslot]
31+
fn tp_new(cls: PyClassRef, ctx: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
32+
let strict = objbool::boolval(vm, vm.get_attribute(ctx.clone(), "strict")?)?;
33+
let object_hook = vm.option_if_none(vm.get_attribute(ctx.clone(), "object_hook")?);
34+
let object_pairs_hook =
35+
vm.option_if_none(vm.get_attribute(ctx.clone(), "object_pairs_hook")?);
36+
let parse_float = vm.get_attribute(ctx.clone(), "parse_float")?;
37+
let parse_float = if vm.is_none(&parse_float) || parse_float.is(&vm.ctx.types.float_type) {
38+
None
39+
} else {
40+
Some(parse_float)
41+
};
42+
let parse_int = vm.get_attribute(ctx.clone(), "parse_int")?;
43+
let parse_int = if vm.is_none(&parse_int) || parse_int.is(&vm.ctx.types.int_type) {
44+
None
45+
} else {
46+
Some(parse_int)
47+
};
48+
let parse_constant = vm.get_attribute(ctx.clone(), "parse_constant")?;
49+
50+
Self {
51+
strict,
52+
object_hook,
53+
object_pairs_hook,
54+
parse_float,
55+
parse_int,
56+
parse_constant,
57+
ctx,
58+
}
59+
.into_ref_with_type(vm, cls)
60+
}
61+
62+
fn parse(
63+
&self,
64+
s: &str,
65+
pystr: PyStringRef,
66+
idx: usize,
67+
scan_once: PyObjectRef,
68+
vm: &VirtualMachine,
69+
) -> PyResult {
70+
let c = s
71+
.chars()
72+
.next()
73+
.ok_or_else(|| objiter::stop_iter_with_value(vm.new_int(idx), vm))?;
74+
let next_idx = idx + c.len_utf8();
75+
match c {
76+
'"' => {
77+
// TODO: parse the string in rust
78+
let parse_str = vm.get_attribute(self.ctx.clone(), "parse_string")?;
79+
return vm.invoke(
80+
&parse_str,
81+
vec![
82+
pystr.into_object(),
83+
vm.new_int(next_idx),
84+
vm.new_bool(self.strict),
85+
],
86+
);
87+
}
88+
'{' => {
89+
// TODO: parse the object in rust
90+
let parse_obj = vm.get_attribute(self.ctx.clone(), "parse_object")?;
91+
return vm.invoke(
92+
&parse_obj,
93+
vec![
94+
vm.ctx
95+
.new_tuple(vec![pystr.into_object(), vm.new_int(next_idx)]),
96+
vm.new_bool(self.strict),
97+
scan_once,
98+
self.object_hook.clone().unwrap_or_else(|| vm.get_none()),
99+
self.object_pairs_hook
100+
.clone()
101+
.unwrap_or_else(|| vm.get_none()),
102+
],
103+
);
104+
}
105+
'[' => {
106+
// TODO: parse the array in rust
107+
let parse_array = vm.get_attribute(self.ctx.clone(), "parse_array")?;
108+
return vm.invoke(
109+
&parse_array,
110+
vec![
111+
vm.ctx
112+
.new_tuple(vec![pystr.into_object(), vm.new_int(next_idx)]),
113+
scan_once,
114+
],
115+
);
116+
}
117+
_ => {}
118+
}
119+
120+
macro_rules! parse_const {
121+
($s:literal, $val:expr) => {
122+
if s.starts_with($s) {
123+
return Ok(vm.ctx.new_tuple(vec![$val, vm.new_int(idx + $s.len())]));
124+
}
125+
};
126+
}
127+
128+
parse_const!("null", vm.get_none());
129+
parse_const!("true", vm.new_bool(true));
130+
parse_const!("false", vm.new_bool(false));
131+
132+
if let Some((res, len)) = self.parse_number(s, vm) {
133+
return Ok(vm.ctx.new_tuple(vec![res?, vm.new_int(idx + len)]));
134+
}
135+
136+
macro_rules! parse_constant {
137+
($s:literal) => {
138+
if s.starts_with($s) {
139+
return Ok(vm.ctx.new_tuple(vec![
140+
vm.invoke(&self.parse_constant, vec![vm.new_str($s.to_owned())])?,
141+
vm.new_int(idx + $s.len()),
142+
]));
143+
}
144+
};
145+
}
146+
147+
parse_constant!("NaN");
148+
parse_constant!("Infinity");
149+
parse_constant!("-Infinity");
150+
151+
Err(objiter::stop_iter_with_value(vm.new_int(idx), vm))
152+
}
153+
154+
fn parse_number(&self, s: &str, vm: &VirtualMachine) -> Option<(PyResult, usize)> {
155+
let mut has_neg = false;
156+
let mut has_decimal = false;
157+
let mut has_exponent = false;
158+
let mut has_e_sign = false;
159+
let mut i = 0;
160+
for c in s.chars() {
161+
match c {
162+
'-' if i == 0 => has_neg = true,
163+
n if n.is_ascii_digit() => {}
164+
'.' if !has_decimal => has_decimal = true,
165+
'e' | 'E' if !has_exponent => has_exponent = true,
166+
'+' | '-' if !has_e_sign => has_e_sign = true,
167+
_ => break,
168+
}
169+
i += 1;
170+
}
171+
if i == 0 || (i == 1 && has_neg) {
172+
return None;
173+
}
174+
let buf = &s[..i];
175+
let ret = if has_decimal || has_exponent {
176+
// float
177+
if let Some(ref parse_float) = self.parse_float {
178+
vm.invoke(parse_float, vec![vm.new_str(buf.to_owned())])
179+
} else {
180+
Ok(vm.ctx.new_float(f64::from_str(buf).unwrap()))
181+
}
182+
} else if let Some(ref parse_int) = self.parse_int {
183+
vm.invoke(parse_int, vec![vm.new_str(buf.to_owned())])
184+
} else {
185+
Ok(vm.new_int(BigInt::from_str(buf).unwrap()))
186+
};
187+
Some((ret, buf.len()))
188+
}
189+
190+
#[pyslot]
191+
fn call(zelf: PyRef<Self>, pystr: PyStringRef, idx: isize, vm: &VirtualMachine) -> PyResult {
192+
if idx < 0 {
193+
return Err(vm.new_value_error("idx cannot be negative".to_owned()));
194+
}
195+
let idx = idx as usize;
196+
let mut chars = pystr.as_str().chars();
197+
if idx > 0 {
198+
chars
199+
.nth(idx - 1)
200+
.ok_or_else(|| objiter::stop_iter_with_value(vm.new_int(idx), vm))?;
201+
}
202+
zelf.parse(
203+
chars.as_str(),
204+
pystr.clone(),
205+
idx,
206+
zelf.clone().into_object(),
207+
vm,
208+
)
209+
}
210+
}
211+
212+
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
213+
let ctx = &vm.ctx;
214+
let scanner_cls = JsonScanner::make_class(ctx);
215+
scanner_cls.set_str_attr("__module__", vm.new_str("_json".to_owned()));
216+
py_module!(vm, "_json", {
217+
"make_scanner" => scanner_cls,
218+
})
219+
}

vm/src/stdlib/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ mod hashlib;
1515
mod imp;
1616
pub mod io;
1717
mod itertools;
18+
mod json;
1819
#[cfg(feature = "rustpython-parser")]
1920
mod keyword;
2021
mod marshal;
@@ -71,14 +72,15 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
7172
let mut modules = hashmap! {
7273
"array".to_owned() => Box::new(array::make_module) as StdlibInitFunc,
7374
"binascii".to_owned() => Box::new(binascii::make_module),
74-
"dis".to_owned() => Box::new(dis::make_module),
7575
"_collections".to_owned() => Box::new(collections::make_module),
7676
"_csv".to_owned() => Box::new(csv::make_module),
77-
"_functools".to_owned() => Box::new(functools::make_module),
77+
"dis".to_owned() => Box::new(dis::make_module),
7878
"errno".to_owned() => Box::new(errno::make_module),
79+
"_functools".to_owned() => Box::new(functools::make_module),
7980
"hashlib".to_owned() => Box::new(hashlib::make_module),
8081
"itertools".to_owned() => Box::new(itertools::make_module),
8182
"_io".to_owned() => Box::new(io::make_module),
83+
"_json".to_owned() => Box::new(json::make_module),
8284
"marshal".to_owned() => Box::new(marshal::make_module),
8385
"math".to_owned() => Box::new(math::make_module),
8486
"_operator".to_owned() => Box::new(operator::make_module),

vm/src/vm.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,13 @@ impl VirtualMachine {
582582
pub fn is_none(&self, obj: &PyObjectRef) -> bool {
583583
obj.is(&self.get_none())
584584
}
585+
pub fn option_if_none(&self, obj: PyObjectRef) -> Option<PyObjectRef> {
586+
if self.is_none(&obj) {
587+
None
588+
} else {
589+
Some(obj)
590+
}
591+
}
585592

586593
pub fn get_type(&self) -> PyClassRef {
587594
self.ctx.type_type()

0 commit comments

Comments
 (0)