From 2224650fbbd00b65684eb1677aa4077bed0deecb Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Wed, 3 Jun 2020 16:17:19 -0500 Subject: [PATCH 1/4] Impl DictKey for &PyStringRef --- vm/src/dictdatatype.rs | 22 +++++++++++++++++++++- vm/src/obj/objstr.rs | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 41576c8a5b..25b80fa7e9 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -1,4 +1,4 @@ -use crate::obj::objstr::PyString; +use crate::obj::objstr::{PyString, PyStringRef}; use crate::pyhash; use crate::pyobject::{IdProtocol, IntoPyObject, PyObjectRef, PyResult}; use crate::vm::VirtualMachine; @@ -438,6 +438,26 @@ impl DictKey for &PyObjectRef { } } +impl DictKey for &PyStringRef { + fn do_hash(self, _vm: &VirtualMachine) -> PyResult { + Ok(self.hash()) + } + + fn do_is(self, other: &PyObjectRef) -> bool { + self.is(other) + } + + fn do_eq(self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult { + if self.is(other_key) { + Ok(true) + } else if let Some(py_str_value) = other_key.payload::() { + Ok(py_str_value.as_str() == self.as_str()) + } else { + vm.bool_eq(self.clone().into_object(), other_key.clone()) + } + } +} + /// Implement trait for the str type, so that we can use strings /// to index dictionaries. impl DictKey for &str { diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index 4a6c16c989..7bbff6b9b9 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -292,7 +292,7 @@ impl PyString { } #[pymethod(name = "__hash__")] - fn hash(&self) -> pyhash::PyHash { + pub(crate) fn hash(&self) -> pyhash::PyHash { self.hash.load().unwrap_or_else(|| { let hash = pyhash::hash_value(&self.value); self.hash.store(Some(hash)); From c2bbacf19aaf4e3b615040b2493d9541bd8fb723 Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Wed, 3 Jun 2020 16:19:28 -0500 Subject: [PATCH 2/4] Add __name__ and __module__ fields to builtin functions --- vm/src/macros.rs | 14 +++++++++++ vm/src/obj/objbuiltinfunc.rs | 45 ++++++++++++++++++++++++++++++++---- vm/src/pyobject.rs | 39 ++++++++++++++++++++++++------- vm/src/types.rs | 4 ++-- vm/src/vm.rs | 7 ++++++ 5 files changed, 94 insertions(+), 15 deletions(-) diff --git a/vm/src/macros.rs b/vm/src/macros.rs index 3639c853c1..1d63e2105a 100644 --- a/vm/src/macros.rs +++ b/vm/src/macros.rs @@ -328,3 +328,17 @@ macro_rules! class_or_notimplemented { } }; } + +#[macro_export] +macro_rules! named_function { + ($ctx:expr, $module:ident, $func:ident) => {{ + paste::expr! { + $crate::pyobject::PyContext::new_function_named( + &$ctx, + [<$module _ $func>], + stringify!($module).to_owned(), + stringify!($func).to_owned(), + ) + } + }}; +} diff --git a/vm/src/obj/objbuiltinfunc.rs b/vm/src/obj/objbuiltinfunc.rs index 9b91ce0681..deae985f51 100644 --- a/vm/src/obj/objbuiltinfunc.rs +++ b/vm/src/obj/objbuiltinfunc.rs @@ -1,6 +1,7 @@ use std::fmt; use crate::function::{OptionalArg, PyFuncArgs, PyNativeFunc}; +use crate::obj::objstr::PyStringRef; use crate::obj::objtype::PyClassRef; use crate::pyobject::{ IdProtocol, PyClassImpl, PyContext, PyObjectRef, PyResult, PyValue, TypeProtocol, @@ -11,12 +12,15 @@ use crate::vm::VirtualMachine; #[pyclass] pub struct PyBuiltinFunction { value: PyNativeFunc, + module: Option, + name: Option, } impl PyValue for PyBuiltinFunction { fn class(vm: &VirtualMachine) -> PyClassRef { vm.ctx.builtin_function_or_method_type() } + const HAVE_DICT: bool = true; } impl fmt::Debug for PyBuiltinFunction { @@ -27,7 +31,19 @@ impl fmt::Debug for PyBuiltinFunction { impl PyBuiltinFunction { pub fn new(value: PyNativeFunc) -> Self { - Self { value } + Self { + value, + module: None, + name: None, + } + } + + pub fn new_with_name(value: PyNativeFunc, module: PyStringRef, name: PyStringRef) -> Self { + Self { + value, + module: Some(module), + name: Some(name), + } } pub fn as_func(&self) -> &PyNativeFunc { @@ -42,7 +58,16 @@ impl SlotCall for PyBuiltinFunction { } #[pyimpl(with(SlotCall))] -impl PyBuiltinFunction {} +impl PyBuiltinFunction { + #[pyproperty(magic)] + fn module(&self) -> Option { + self.module.clone() + } + #[pyproperty(magic)] + fn name(&self) -> Option { + self.name.clone() + } +} #[pyclass] pub struct PyBuiltinMethod { @@ -64,7 +89,12 @@ impl fmt::Debug for PyBuiltinMethod { impl PyBuiltinMethod { pub fn new(value: PyNativeFunc) -> Self { Self { - function: PyBuiltinFunction { value }, + function: PyBuiltinFunction::new(value), + } + } + pub fn new_with_name(value: PyNativeFunc, module: PyStringRef, name: PyStringRef) -> Self { + Self { + function: PyBuiltinFunction::new_with_name(value, module, name), } } @@ -100,9 +130,14 @@ impl SlotCall for PyBuiltinMethod { #[pyimpl(with(SlotDescriptor, SlotCall))] impl PyBuiltinMethod { - // TODO: give builtin functions names #[pyproperty(magic)] - fn name(&self) {} + fn module(&self) -> Option { + self.function.module.clone() + } + #[pyproperty(magic)] + fn name(&self) -> Option { + self.function.name.clone() + } } pub fn init(context: &PyContext) { diff --git a/vm/src/pyobject.rs b/vm/src/pyobject.rs index 1408020888..9de2ed6c3c 100644 --- a/vm/src/pyobject.rs +++ b/vm/src/pyobject.rs @@ -138,7 +138,7 @@ impl PyContext { let exceptions = exceptions::ExceptionZoo::new(&types.type_type, &types.object_type); fn create_object(payload: T, cls: &PyClassRef) -> PyRef { - PyRef::new_ref_unchecked(PyObject::new(payload, cls.clone(), None)) + PyRef::from_obj_unchecked(PyObject::new(payload, cls.clone(), None)) } let none_type = create_type("NoneType", &types.type_type, &types.object_type); @@ -492,6 +492,18 @@ impl PyContext { ) } + pub fn new_function_named(&self, f: F, module: String, name: String) -> PyObjectRef + where + F: IntoPyNativeFunc, + { + let stringref = |s| PyRef::new_ref(objstr::PyString::from(s), self.str_type(), None); + PyObject::new( + PyBuiltinFunction::new_with_name(f.into_func(), stringref(module), stringref(name)), + self.builtin_function_or_method_type(), + None, + ) + } + pub fn new_method(&self, f: F) -> PyObjectRef where F: IntoPyNativeFunc, @@ -695,9 +707,14 @@ impl Clone for PyRef { } impl PyRef { - fn new_ref(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + #[allow(clippy::new_ret_no_self)] + pub fn new_ref(payload: T, typ: PyClassRef, dict: Option) -> Self { + Self::from_obj_unchecked(PyObject::new(payload, typ, dict)) + } + + fn from_obj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { if obj.payload_is::() { - Ok(Self::new_ref_unchecked(obj)) + Ok(Self::from_obj_unchecked(obj)) } else { Err(vm.new_runtime_error(format!( "Unexpected payload for type {:?}", @@ -706,7 +723,7 @@ impl PyRef { } } - pub(crate) fn new_ref_unchecked(obj: PyObjectRef) -> Self { + pub(crate) fn from_obj_unchecked(obj: PyObjectRef) -> Self { PyRef { obj, _payload: PhantomData, @@ -747,7 +764,7 @@ where { fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { if objtype::isinstance(&obj, &T::class(vm)) { - PyRef::new_ref(obj, vm) + PyRef::from_obj(obj, vm) } else { let class = T::class(vm); let expected_type = vm.to_pystr(&class)?; @@ -1084,6 +1101,12 @@ impl IntoPyObject for PyRef { } } +impl IntoPyObject for &PyRef { + fn into_pyobject(self, _vm: &VirtualMachine) -> PyResult { + Ok(self.obj.clone()) + } +} + impl IntoPyObject for PyCallable { fn into_pyobject(self, _vm: &VirtualMachine) -> PyResult { Ok(self.into_object()) @@ -1145,7 +1168,7 @@ where where T: PyValue, { - PyRef::new_ref_unchecked(self as PyObjectRef) + PyRef::from_obj_unchecked(self as PyObjectRef) } } @@ -1210,7 +1233,7 @@ pub trait PyValue: fmt::Debug + Send + Sync + Sized + 'static { } else { Some(vm.ctx.new_dict()) }; - PyRef::new_ref(PyObject::new(self, cls, dict), vm) + PyRef::from_obj(PyObject::new(self, cls, dict), vm) } else { let subtype = vm.to_str(&cls.obj)?; let basetype = vm.to_str(&class.obj)?; @@ -1219,7 +1242,7 @@ pub trait PyValue: fmt::Debug + Send + Sync + Sized + 'static { } fn into_ref_with_type_unchecked(self, cls: PyClassRef, dict: Option) -> PyRef { - PyRef::new_ref_unchecked(PyObject::new(self, cls, dict)) + PyRef::from_obj_unchecked(PyObject::new(self, cls, dict)) } } diff --git a/vm/src/types.rs b/vm/src/types.rs index fb43bc092e..32b01ed0ac 100644 --- a/vm/src/types.rs +++ b/vm/src/types.rs @@ -338,8 +338,8 @@ fn init_type_hierarchy() -> (PyClassRef, PyClassRef) { type_type, ); - let type_type = PyClassRef::new_ref_unchecked(Arc::from_raw(type_type_ptr)); - let object_type = PyClassRef::new_ref_unchecked(Arc::from_raw(object_type_ptr)); + let type_type = PyClassRef::from_obj_unchecked(Arc::from_raw(type_type_ptr)); + let object_type = PyClassRef::from_obj_unchecked(Arc::from_raw(object_type_ptr)); (*type_type_ptr).payload.mro = vec![object_type.clone()]; (*type_type_ptr).payload.bases = vec![object_type.clone()]; diff --git a/vm/src/vm.rs b/vm/src/vm.rs index f96aedf655..59891fe04c 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -1483,6 +1483,13 @@ impl VirtualMachine { attr_name: impl TryIntoRef, attr_value: impl Into, ) -> PyResult<()> { + // let attr_name = attr_name.try_into_ref(self)?; + // let value = attr_value.into(); + // let dict = module.dict().expect("module doesn't have dict"); + // if let Ok(module_name) = dict.get_item("__name__", self) { + // let _ = self.set_attr(&value, "__module__", module_name); + // } + // dict.set_item(&attr_name, value, self)?; let val = attr_value.into(); objobject::setattr(module.clone(), attr_name.try_into_ref(self)?, val, self) } From 0fb79e1086ba86bc21ca3861c0c5670fae2ee514 Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Wed, 3 Jun 2020 16:20:10 -0500 Subject: [PATCH 3/4] Implement _json.encode_basestring{,_ascii} --- Lib/test/test_json/test_speedups.py | 2 - vm/Cargo.toml | 2 +- vm/src/stdlib/json.rs | 21 +++++++ vm/src/stdlib/json/machinery.rs | 98 +++++++++++++++++++++++++++++ vm/src/vm.rs | 7 --- 5 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 vm/src/stdlib/json/machinery.rs diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 8790b01b38..7e3a559470 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -14,8 +14,6 @@ def test_scanstring(self): self.assertEqual(self.json.decoder.scanstring.__module__, "_json") self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_encode_basestring_ascii(self): self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__, "_json") diff --git a/vm/Cargo.toml b/vm/Cargo.toml index f42f48ef32..7a123c2607 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -120,7 +120,7 @@ libz-sys = "1.0" winreg = "0.7" schannel = "0.1" -[target."cfg(windows)".dependencies.winapi] +[target.'cfg(windows)'.dependencies.winapi] version = "0.3" features = ["winsock2", "handleapi", "ws2def", "std", "winbase", "wincrypt", "fileapi"] diff --git a/vm/src/stdlib/json.rs b/vm/src/stdlib/json.rs index 64f59215b9..57f6f6b380 100644 --- a/vm/src/stdlib/json.rs +++ b/vm/src/stdlib/json.rs @@ -7,6 +7,8 @@ use crate::VirtualMachine; use num_bigint::BigInt; use std::str::FromStr; +mod machinery; + #[pyclass(name = "Scanner")] #[derive(Debug)] struct JsonScanner { @@ -209,11 +211,30 @@ impl JsonScanner { } } +fn encode_string(s: &str, ascii_only: bool) -> String { + let mut buf = Vec::::with_capacity(s.len() + 2); + machinery::write_json_string(s, ascii_only, &mut buf) + // writing to a vec can't fail + .unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() }); + // TODO: verify that the implementation is correct enough to use `from_utf8_unchecked` + String::from_utf8(buf).expect("invalid utf-8 in json output") +} + +fn _json_encode_basestring(s: PyStringRef) -> String { + encode_string(s.as_str(), false) +} + +fn _json_encode_basestring_ascii(s: PyStringRef) -> String { + encode_string(s.as_str(), true) +} + pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { let ctx = &vm.ctx; let scanner_cls = JsonScanner::make_class(ctx); scanner_cls.set_str_attr("__module__", vm.new_str("_json".to_owned())); py_module!(vm, "_json", { "make_scanner" => scanner_cls, + "encode_basestring" => named_function!(ctx, _json, encode_basestring), + "encode_basestring_ascii" => named_function!(ctx, _json, encode_basestring_ascii), }) } diff --git a/vm/src/stdlib/json/machinery.rs b/vm/src/stdlib/json/machinery.rs new file mode 100644 index 0000000000..f02489df6b --- /dev/null +++ b/vm/src/stdlib/json/machinery.rs @@ -0,0 +1,98 @@ +// derived from https://github.com/lovasoa/json_in_type + +// BSD 2-Clause License +// +// Copyright (c) 2018, Ophir LOJKINE +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use std::io; + +static ESCAPE_CHARS: [&[u8]; 0x20] = [ + b"\\u0000", b"\\u0001", b"\\u0002", b"\\u0003", b"\\u0004", b"\\u0005", b"\\u0006", b"\\u0007", + b"\\b", b"\\t", b"\\n", b"\\u000b", b"\\f", b"\\r", b"\\u000e", b"\\u000f", b"\\u0010", + b"\\u0011", b"\\u0012", b"\\u0013", b"\\u0014", b"\\u0015", b"\\u0016", b"\\u0017", b"\\u0018", + b"\\u0019", b"\\u001a", b"\\u001b", b"\\u001c", b"\\u001d", b"\\u001e", b"\\u001f", +]; + +// This bitset represents which bytes can be copied as-is to a JSON string (0) +// And which one need to be escaped (1) +// The characters that need escaping are 0x00 to 0x1F, 0x22 ("), 0x5C (\), 0x7F (DEL) +// Non-ASCII unicode characters can be safely included in a JSON string +static NEEDS_ESCAPING_BITSET: [u64; 4] = [ + //fedcba9876543210_fedcba9876543210_fedcba9876543210_fedcba9876543210 + 0b0000000000000000_0000000000000100_1111111111111111_1111111111111111, // 3_2_1_0 + 0b1000000000000000_0000000000000000_0001000000000000_0000000000000000, // 7_6_5_4 + 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // B_A_9_8 + 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000, // F_E_D_C +]; + +#[inline(always)] +fn json_escaped_char(c: u8) -> Option<&'static [u8]> { + let bitset_value = NEEDS_ESCAPING_BITSET[(c / 64) as usize] & (1 << (c % 64)); + if bitset_value == 0 { + None + } else { + Some(match c { + x if x < 0x20 => ESCAPE_CHARS[c as usize], + b'\\' => &b"\\\\"[..], + b'\"' => &b"\\\""[..], + 0x7F => &b"\\u007f"[..], + _ => unreachable!(), + }) + } +} + +pub fn write_json_string(s: &str, ascii_only: bool, w: &mut W) -> io::Result<()> { + w.write_all(b"\"")?; + let mut write_start_idx = 0; + let bytes = s.as_bytes(); + if ascii_only { + for (idx, c) in s.char_indices() { + if c.is_ascii() { + if let Some(escaped) = json_escaped_char(c as u8) { + w.write_all(&bytes[write_start_idx..idx])?; + w.write_all(escaped)?; + write_start_idx = idx + 1; + } + } else { + w.write_all(&bytes[write_start_idx..idx])?; + write_start_idx = idx + c.len_utf8(); + // codepoints outside the BMP get 2 '\uxxxx' sequences to represent them + for point in c.encode_utf16(&mut [0; 2]) { + write!(w, "\\u{:04x}", point)?; + } + } + } + } else { + for (idx, c) in s.bytes().enumerate() { + if let Some(escaped) = json_escaped_char(c) { + w.write_all(&bytes[write_start_idx..idx])?; + w.write_all(escaped)?; + write_start_idx = idx + 1; + } + } + } + w.write_all(&bytes[write_start_idx..])?; + w.write_all(b"\"") +} diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 59891fe04c..f96aedf655 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -1483,13 +1483,6 @@ impl VirtualMachine { attr_name: impl TryIntoRef, attr_value: impl Into, ) -> PyResult<()> { - // let attr_name = attr_name.try_into_ref(self)?; - // let value = attr_value.into(); - // let dict = module.dict().expect("module doesn't have dict"); - // if let Ok(module_name) = dict.get_item("__name__", self) { - // let _ = self.set_attr(&value, "__module__", module_name); - // } - // dict.set_item(&attr_name, value, self)?; let val = attr_value.into(); objobject::setattr(module.clone(), attr_name.try_into_ref(self)?, val, self) } From 74f261a22cbeae126f24444f187af9560decc5d9 Mon Sep 17 00:00:00 2001 From: Noah <33094578+coolreader18@users.noreply.github.com> Date: Sat, 6 Jun 2020 15:37:47 -0500 Subject: [PATCH 4/4] Fix Rust 1.44 clippy warnings --- compiler/src/compile.rs | 2 +- vm/src/obj/objtype.rs | 1 + vm/src/pyobject.rs | 2 +- vm/src/scope.rs | 1 - 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/src/compile.rs b/compiler/src/compile.rs index b7e2f0a323..7303e43e27 100644 --- a/compiler/src/compile.rs +++ b/compiler/src/compile.rs @@ -591,7 +591,7 @@ impl Compiler { { return Err(self.error_loc( CompileErrorType::AsyncReturnValue, - statement.location.clone(), + statement.location, )); } self.compile_expression(v)?; diff --git a/vm/src/obj/objtype.rs b/vm/src/obj/objtype.rs index 85b2b4a3ff..f6debf5f94 100644 --- a/vm/src/obj/objtype.rs +++ b/vm/src/obj/objtype.rs @@ -274,6 +274,7 @@ impl PyClassRef { // Search the bases for the proper metatype to deal with this: let winner = calculate_meta_class(metatype.clone(), &bases, vm)?; let metatype = if !winner.is(&metatype) { + #[allow(clippy::redundant_clone)] // false positive if let Some(ref tp_new) = winner.clone().slots.read().unwrap().new { // Pass it to the winner diff --git a/vm/src/pyobject.rs b/vm/src/pyobject.rs index 9de2ed6c3c..5d43fdf5cf 100644 --- a/vm/src/pyobject.rs +++ b/vm/src/pyobject.rs @@ -599,7 +599,7 @@ impl PyContext { bytecode::Constant::Complex { ref value } => self.new_complex(*value), bytecode::Constant::String { ref value } => self.new_str(value.clone()), bytecode::Constant::Bytes { ref value } => self.new_bytes(value.clone()), - bytecode::Constant::Boolean { ref value } => self.new_bool(value.clone()), + bytecode::Constant::Boolean { value } => self.new_bool(value), bytecode::Constant::Code { ref code } => { self.new_code_object(*code.clone()).into_object() } diff --git a/vm/src/scope.rs b/vm/src/scope.rs index bbba87a30d..e7308994ec 100644 --- a/vm/src/scope.rs +++ b/vm/src/scope.rs @@ -39,7 +39,6 @@ impl Scope { ) -> Scope { if !globals.contains_key("__builtins__", vm) { globals - .clone() .set_item("__builtins__", vm.builtins.clone(), vm) .unwrap(); }