Skip to content

Commit 0fb79e1

Browse files
committed
Implement _json.encode_basestring{,_ascii}
1 parent c2bbacf commit 0fb79e1

File tree

5 files changed

+120
-10
lines changed

5 files changed

+120
-10
lines changed

Lib/test/test_json/test_speedups.py

-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ def test_scanstring(self):
1414
self.assertEqual(self.json.decoder.scanstring.__module__, "_json")
1515
self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring)
1616

17-
# TODO: RUSTPYTHON
18-
@unittest.expectedFailure
1917
def test_encode_basestring_ascii(self):
2018
self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__,
2119
"_json")

vm/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ libz-sys = "1.0"
120120
winreg = "0.7"
121121
schannel = "0.1"
122122

123-
[target."cfg(windows)".dependencies.winapi]
123+
[target.'cfg(windows)'.dependencies.winapi]
124124
version = "0.3"
125125
features = ["winsock2", "handleapi", "ws2def", "std", "winbase", "wincrypt", "fileapi"]
126126

vm/src/stdlib/json.rs

+21
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use crate::VirtualMachine;
77
use num_bigint::BigInt;
88
use std::str::FromStr;
99

10+
mod machinery;
11+
1012
#[pyclass(name = "Scanner")]
1113
#[derive(Debug)]
1214
struct JsonScanner {
@@ -209,11 +211,30 @@ impl JsonScanner {
209211
}
210212
}
211213

214+
fn encode_string(s: &str, ascii_only: bool) -> String {
215+
let mut buf = Vec::<u8>::with_capacity(s.len() + 2);
216+
machinery::write_json_string(s, ascii_only, &mut buf)
217+
// writing to a vec can't fail
218+
.unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() });
219+
// TODO: verify that the implementation is correct enough to use `from_utf8_unchecked`
220+
String::from_utf8(buf).expect("invalid utf-8 in json output")
221+
}
222+
223+
fn _json_encode_basestring(s: PyStringRef) -> String {
224+
encode_string(s.as_str(), false)
225+
}
226+
227+
fn _json_encode_basestring_ascii(s: PyStringRef) -> String {
228+
encode_string(s.as_str(), true)
229+
}
230+
212231
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
213232
let ctx = &vm.ctx;
214233
let scanner_cls = JsonScanner::make_class(ctx);
215234
scanner_cls.set_str_attr("__module__", vm.new_str("_json".to_owned()));
216235
py_module!(vm, "_json", {
217236
"make_scanner" => scanner_cls,
237+
"encode_basestring" => named_function!(ctx, _json, encode_basestring),
238+
"encode_basestring_ascii" => named_function!(ctx, _json, encode_basestring_ascii),
218239
})
219240
}

vm/src/stdlib/json/machinery.rs

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// derived from https://github.com/lovasoa/json_in_type
2+
3+
// BSD 2-Clause License
4+
//
5+
// Copyright (c) 2018, Ophir LOJKINE
6+
// All rights reserved.
7+
//
8+
// Redistribution and use in source and binary forms, with or without
9+
// modification, are permitted provided that the following conditions are met:
10+
//
11+
// * Redistributions of source code must retain the above copyright notice, this
12+
// list of conditions and the following disclaimer.
13+
//
14+
// * Redistributions in binary form must reproduce the above copyright notice,
15+
// this list of conditions and the following disclaimer in the documentation
16+
// and/or other materials provided with the distribution.
17+
//
18+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
29+
use std::io;
30+
31+
static ESCAPE_CHARS: [&[u8]; 0x20] = [
32+
b"\\u0000", b"\\u0001", b"\\u0002", b"\\u0003", b"\\u0004", b"\\u0005", b"\\u0006", b"\\u0007",
33+
b"\\b", b"\\t", b"\\n", b"\\u000b", b"\\f", b"\\r", b"\\u000e", b"\\u000f", b"\\u0010",
34+
b"\\u0011", b"\\u0012", b"\\u0013", b"\\u0014", b"\\u0015", b"\\u0016", b"\\u0017", b"\\u0018",
35+
b"\\u0019", b"\\u001a", b"\\u001b", b"\\u001c", b"\\u001d", b"\\u001e", b"\\u001f",
36+
];
37+
38+
// This bitset represents which bytes can be copied as-is to a JSON string (0)
39+
// And which one need to be escaped (1)
40+
// The characters that need escaping are 0x00 to 0x1F, 0x22 ("), 0x5C (\), 0x7F (DEL)
41+
// Non-ASCII unicode characters can be safely included in a JSON string
42+
static NEEDS_ESCAPING_BITSET: [u64; 4] = [
43+
//fedcba9876543210_fedcba9876543210_fedcba9876543210_fedcba9876543210
44+
0b0000000000000000_0000000000000100_1111111111111111_1111111111111111, // 3_2_1_0
45+
0b1000000000000000_0000000000000000_0001000000000000_0000000000000000, // 7_6_5_4
46+
0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // B_A_9_8
47+
0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // F_E_D_C
48+
];
49+
50+
#[inline(always)]
51+
fn json_escaped_char(c: u8) -> Option<&'static [u8]> {
52+
let bitset_value = NEEDS_ESCAPING_BITSET[(c / 64) as usize] & (1 << (c % 64));
53+
if bitset_value == 0 {
54+
None
55+
} else {
56+
Some(match c {
57+
x if x < 0x20 => ESCAPE_CHARS[c as usize],
58+
b'\\' => &b"\\\\"[..],
59+
b'\"' => &b"\\\""[..],
60+
0x7F => &b"\\u007f"[..],
61+
_ => unreachable!(),
62+
})
63+
}
64+
}
65+
66+
pub fn write_json_string<W: io::Write>(s: &str, ascii_only: bool, w: &mut W) -> io::Result<()> {
67+
w.write_all(b"\"")?;
68+
let mut write_start_idx = 0;
69+
let bytes = s.as_bytes();
70+
if ascii_only {
71+
for (idx, c) in s.char_indices() {
72+
if c.is_ascii() {
73+
if let Some(escaped) = json_escaped_char(c as u8) {
74+
w.write_all(&bytes[write_start_idx..idx])?;
75+
w.write_all(escaped)?;
76+
write_start_idx = idx + 1;
77+
}
78+
} else {
79+
w.write_all(&bytes[write_start_idx..idx])?;
80+
write_start_idx = idx + c.len_utf8();
81+
// codepoints outside the BMP get 2 '\uxxxx' sequences to represent them
82+
for point in c.encode_utf16(&mut [0; 2]) {
83+
write!(w, "\\u{:04x}", point)?;
84+
}
85+
}
86+
}
87+
} else {
88+
for (idx, c) in s.bytes().enumerate() {
89+
if let Some(escaped) = json_escaped_char(c) {
90+
w.write_all(&bytes[write_start_idx..idx])?;
91+
w.write_all(escaped)?;
92+
write_start_idx = idx + 1;
93+
}
94+
}
95+
}
96+
w.write_all(&bytes[write_start_idx..])?;
97+
w.write_all(b"\"")
98+
}

vm/src/vm.rs

-7
Original file line numberDiff line numberDiff line change
@@ -1483,13 +1483,6 @@ impl VirtualMachine {
14831483
attr_name: impl TryIntoRef<PyString>,
14841484
attr_value: impl Into<PyObjectRef>,
14851485
) -> PyResult<()> {
1486-
// let attr_name = attr_name.try_into_ref(self)?;
1487-
// let value = attr_value.into();
1488-
// let dict = module.dict().expect("module doesn't have dict");
1489-
// if let Ok(module_name) = dict.get_item("__name__", self) {
1490-
// let _ = self.set_attr(&value, "__module__", module_name);
1491-
// }
1492-
// dict.set_item(&attr_name, value, self)?;
14931486
let val = attr_value.into();
14941487
objobject::setattr(module.clone(), attr_name.try_into_ref(self)?, val, self)
14951488
}

0 commit comments

Comments
 (0)