From 33fccbe2597aa8509f143208fdfb7ef5856b4456 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 21 Mar 2023 01:23:02 +0900 Subject: [PATCH 1/3] leaked pystr and doc memory usage optimization --- vm/src/builtins/str.rs | 32 ++++++++++++++++++++++++++++++++ vm/src/builtins/super.rs | 18 ------------------ vm/src/class.rs | 9 ++++++--- vm/src/stdlib/sys.rs | 4 ++-- vm/src/vm/context.rs | 4 ++++ 5 files changed, 44 insertions(+), 23 deletions(-) diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 8aafc63c3b..542132ca76 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -2221,6 +2221,38 @@ impl PyStrInterned { pub fn to_exact(&'static self) -> PyRefExact { unsafe { PyRefExact::new_unchecked(self.to_owned()) } } + + pub(crate) fn new_static_str(s: &'static str, ctx: &Context) -> &'static PyStrInterned { + if let Some(interned) = ctx.interned_str(s) { + return interned; + } + + struct StaticStrData { + _bytes: &'static [u8], + _kind: StrKind, + _len: usize, + } + struct PyStrView { + _data: StaticStrData, + _hash: PyAtomic, + } + let kind = s.str_kind(); + let fake_payload = PyStrView { + _data: StaticStrData { + _bytes: s.as_bytes(), + _kind: kind, + _len: s.len(), + }, + _hash: Radium::new(hash::SENTINEL), + }; + let payload: PyStr = unsafe { + // Safety: + // the layout of Box<[u8]> == &<[u8]> + // and Box::drop will never be called by leaking + std::mem::transmute(fake_payload) + }; + ctx.intern_str(payload.into_exact_ref(ctx)) + } } impl std::fmt::Display for PyStrInterned { diff --git a/vm/src/builtins/super.rs b/vm/src/builtins/super.rs index 5f363ebea5..7a1f9f510b 100644 --- a/vm/src/builtins/super.rs +++ b/vm/src/builtins/super.rs @@ -258,22 +258,4 @@ fn supercheck(ty: PyTypeRef, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult< pub fn init(context: &Context) { let super_type = &context.types.super_type; PySuper::extend_class(context, super_type); - - let super_doc = "super() -> same as super(__class__, )\n\ - super(type) -> unbound super object\n\ - super(type, obj) -> bound super object; requires isinstance(obj, type)\n\ - super(type, type2) -> bound super object; requires issubclass(type2, type)\n\ - Typical use to call a cooperative superclass method:\n\ - class C(B):\n \ - def meth(self, arg):\n \ - super().meth(arg)\n\ - This works for class methods too:\n\ - class C(B):\n \ - @classmethod\n \ - def cmeth(cls, arg):\n \ - super().cmeth(arg)\n"; - - extend_class!(context, super_type, { - "__doc__" => context.new_str(super_doc), - }); } diff --git a/vm/src/class.rs b/vm/src/class.rs index bc38d6bd61..306a40f59e 100644 --- a/vm/src/class.rs +++ b/vm/src/class.rs @@ -79,7 +79,7 @@ pub trait PyClassImpl: PyClassDef { assert!(class.slots.flags.is_created_with_flags()); } - let _ = ctx.intern_str(Self::NAME); // intern type name + let _ = ctx.intern_static_str(Self::NAME); // intern type name if Self::TP_FLAGS.has_feature(PyTypeFlags::HAS_DICT) { let __dict__ = identifier!(ctx, __dict__); @@ -96,12 +96,15 @@ pub trait PyClassImpl: PyClassDef { } Self::impl_extend_class(ctx, class); if let Some(doc) = Self::DOC { - class.set_attr(identifier!(ctx, __doc__), ctx.new_str(doc).into()); + class.set_attr( + identifier!(ctx, __doc__), + ctx.intern_static_str(doc).to_owned().into(), + ); } if let Some(module_name) = Self::MODULE_NAME { class.set_attr( identifier!(ctx, __module__), - ctx.new_str(module_name).into(), + ctx.intern_static_str(module_name).to_owned().into(), ); } diff --git a/vm/src/stdlib/sys.rs b/vm/src/stdlib/sys.rs index fdfe2faf69..2b6ed77d0e 100644 --- a/vm/src/stdlib/sys.rs +++ b/vm/src/stdlib/sys.rs @@ -1,4 +1,4 @@ -use crate::{Py, PyResult, VirtualMachine, builtins::PyModule, convert::ToPyObject}; +use crate::{Py, PyResult, VirtualMachine, builtins::PyModule, convert::IntoObject}; pub(crate) use sys::{__module_def, DOC, MAXSIZE, MULTIARCH, UnraisableHookArgs}; @@ -1099,7 +1099,7 @@ pub(crate) fn init_module(vm: &VirtualMachine, module: &Py, builtins: .set_item("builtins", builtins.to_owned().into(), vm) .unwrap(); extend_module!(vm, module, { - "__doc__" => sys::DOC.to_owned().to_pyobject(vm), + "__doc__" => vm.ctx.intern_static_str(sys::DOC.unwrap()).to_owned().into_object(), "modules" => modules, }); } diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index a61484e6bc..a096e4d61e 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -338,6 +338,10 @@ impl Context { unsafe { self.string_pool.intern(s, self.types.str_type.to_owned()) } } + pub(crate) fn intern_static_str(&self, s: &'static str) -> &'static PyStrInterned { + PyStrInterned::new_static_str(s, self) + } + pub fn interned_str( &self, s: &S, From b4706df1fc50def413cc7398c29cef5cca06a1d1 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 21 Mar 2023 00:51:00 +0900 Subject: [PATCH 2/3] additional intern_static_str --- derive-impl/src/pymodule.rs | 2 +- vm/src/builtins/dict.rs | 6 +++--- vm/src/builtins/frame.rs | 2 +- vm/src/builtins/str.rs | 2 +- vm/src/warn.rs | 6 +++++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/derive-impl/src/pymodule.rs b/derive-impl/src/pymodule.rs index eb1e4bba9e..e195d0dac5 100644 --- a/derive-impl/src/pymodule.rs +++ b/derive-impl/src/pymodule.rs @@ -571,7 +571,7 @@ impl ModuleItem for ClassItem { let set_attr = match py_names.len() { 0 => quote! { let _ = new_class; // suppress warning - let _ = vm.ctx.intern_str(#class_name); + let _ = vm.ctx.intern_str(#class_name); // TODO: intern_static_str }, 1 => { let py_name = &py_names[0]; diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index a19b11fcfb..6147adffc7 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -68,7 +68,7 @@ impl PyDict { Err(other) => other, }; let dict = &self.entries; - if let Some(keys) = vm.get_method(other.clone(), vm.ctx.intern_str("keys")) { + if let Some(keys) = vm.get_method(other.clone(), vm.ctx.interned_str("keys").unwrap()) { let keys = keys?.call((), vm)?.get_iter(vm)?; while let PyIterReturn::Return(key) = keys.next(vm)? { let val = other.get_item(&*key, vm)?; @@ -509,7 +509,7 @@ impl Representable for PyDict { vm.ctx.new_str(format!("{{{}}}", str_parts.join(", "))) } else { - vm.ctx.intern_str("{...}").to_owned() + vm.ctx.intern_static_str("{...}").to_owned() }; Ok(s) } @@ -797,7 +797,7 @@ macro_rules! dict_view { vm.ctx .new_str(format!("{}([{}])", Self::NAME, str_parts.join(", "))) } else { - vm.ctx.intern_str("{...}").to_owned() + vm.ctx.intern_static_str("{...}").to_owned() }; Ok(s) } diff --git a/vm/src/builtins/frame.rs b/vm/src/builtins/frame.rs index 1b73850190..370039b326 100644 --- a/vm/src/builtins/frame.rs +++ b/vm/src/builtins/frame.rs @@ -22,7 +22,7 @@ impl Representable for Frame { #[inline] fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { const REPR: &str = ""; - Ok(vm.ctx.intern_str(REPR).to_owned()) + Ok(vm.ctx.intern_static_str(REPR).to_owned()) } #[cold] diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 542132ca76..addee45e91 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -251,7 +251,7 @@ impl<'a> AsPyStr<'a> for &'a PyStrRef { impl AsPyStr<'static> for &'static str { #[inline] fn as_pystr(self, ctx: &Context) -> &'static Py { - ctx.intern_str(self) + ctx.intern_static_str(self) } } diff --git a/vm/src/warn.rs b/vm/src/warn.rs index b2055225a0..3da67a5b5a 100644 --- a/vm/src/warn.rs +++ b/vm/src/warn.rs @@ -397,7 +397,11 @@ fn setup_context( let (globals, filename, lineno) = if let Some(f) = f { (f.globals.clone(), f.code.source_path, f.f_lineno()) } else { - (vm.current_globals().clone(), vm.ctx.intern_str("sys"), 1) + ( + vm.current_globals().clone(), + vm.ctx.interned_str("sys").unwrap(), + 1, + ) }; let registry = if let Ok(registry) = globals.get_item(__warningregistry__, vm) { From d1817ac88c5ade617bd672dbba75d000fc8c67a2 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 21 Mar 2023 01:04:14 +0900 Subject: [PATCH 3/3] more intern_static_str adaption --- vm/src/builtins/tuple.rs | 4 ++-- vm/src/builtins/type.rs | 4 ++-- vm/src/stdlib/builtins.rs | 2 +- vm/src/stdlib/errno.rs | 2 +- vm/src/stdlib/sys.rs | 2 +- vm/src/vm/mod.rs | 22 +++++++++++++++------- 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 1b6e281657..36779a535b 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -430,7 +430,7 @@ impl Representable for PyTuple { #[inline] fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { let s = if zelf.is_empty() { - vm.ctx.intern_str("()").to_owned() + vm.ctx.intern_static_str("()").to_owned() } else if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { let s = if zelf.len() == 1 { format!("({},)", zelf.elements[0].repr(vm)?) @@ -439,7 +439,7 @@ impl Representable for PyTuple { }; vm.ctx.new_str(s) } else { - vm.ctx.intern_str("(...)").to_owned() + vm.ctx.intern_static_str("(...)").to_owned() }; Ok(s) } diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 776c777cb3..f51f11678e 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -323,12 +323,12 @@ impl PyType { // This is used for class initialisation where the vm is not yet available. pub fn set_str_attr>( &self, - attr_name: &str, + attr_name: &'static str, value: V, ctx: impl AsRef, ) { let ctx = ctx.as_ref(); - let attr_name = ctx.intern_str(attr_name); + let attr_name = ctx.intern_static_str(attr_name); self.set_attr(attr_name, value.into()) } diff --git a/vm/src/stdlib/builtins.rs b/vm/src/stdlib/builtins.rs index f778d02ba3..0635c6cd13 100644 --- a/vm/src/stdlib/builtins.rs +++ b/vm/src/stdlib/builtins.rs @@ -406,7 +406,7 @@ mod builtins { fn breakpoint(args: FuncArgs, vm: &VirtualMachine) -> PyResult { match vm .sys_module - .get_attr(vm.ctx.intern_str("breakpointhook"), vm) + .get_attr(vm.ctx.intern_static_str("breakpointhook"), vm) { Ok(hook) => hook.as_ref().call(args, vm), Err(_) => Err(vm.new_runtime_error("lost sys.breakpointhook".to_owned())), diff --git a/vm/src/stdlib/errno.rs b/vm/src/stdlib/errno.rs index c77fcbfefc..40f49b8aed 100644 --- a/vm/src/stdlib/errno.rs +++ b/vm/src/stdlib/errno.rs @@ -12,7 +12,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyRef { "errorcode" => errorcode.clone(), }); for (name, code) in ERROR_CODES { - let name = vm.ctx.intern_str(*name); + let name = vm.ctx.intern_static_str(name); let code = vm.new_pyobj(*code); errorcode .set_item(&*code, name.to_owned().into(), vm) diff --git a/vm/src/stdlib/sys.rs b/vm/src/stdlib/sys.rs index 2b6ed77d0e..938c0436ae 100644 --- a/vm/src/stdlib/sys.rs +++ b/vm/src/stdlib/sys.rs @@ -149,7 +149,7 @@ mod sys { fn byteorder(vm: &VirtualMachine) -> PyStrRef { // https://doc.rust-lang.org/reference/conditional-compilation.html#target_endian vm.ctx - .intern_str(if cfg!(target_endian = "little") { + .intern_static_str(if cfg!(target_endian = "little") { "little" } else if cfg!(target_endian = "big") { "big" diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 67b160726b..af9206a526 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -206,12 +206,20 @@ impl VirtualMachine { vm.builtins.init_dict( vm.ctx.intern_str("builtins"), - Some(vm.ctx.intern_str(stdlib::builtins::DOC.unwrap()).to_owned()), + Some( + vm.ctx + .intern_static_str(stdlib::builtins::DOC.unwrap()) + .to_owned(), + ), &vm, ); vm.sys_module.init_dict( vm.ctx.intern_str("sys"), - Some(vm.ctx.intern_str(stdlib::sys::DOC.unwrap()).to_owned()), + Some( + vm.ctx + .intern_static_str(stdlib::sys::DOC.unwrap()) + .to_owned(), + ), &vm, ); // let name = vm.sys_module.get_attr("__name__", &vm).unwrap(); @@ -300,7 +308,7 @@ impl VirtualMachine { #[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))] { let io = import::import_builtin(self, "_io")?; - let set_stdio = |name, fd, write| { + let set_stdio = |name, dunder_name, fd, write| { let buffered_stdio = self.state.settings.buffered_stdio; let unbuffered = write && !buffered_stdio; let buf = crate::stdlib::io::open( @@ -332,7 +340,7 @@ impl VirtualMachine { let mode = if write { "w" } else { "r" }; stdio.set_attr("mode", self.ctx.new_str(mode), self)?; - let dunder_name = self.ctx.intern_str(format!("__{name}__")); + let dunder_name = self.ctx.intern_static_str(dunder_name); self.sys_module.set_attr( dunder_name, // e.g. __stdin__ stdio.clone(), @@ -341,9 +349,9 @@ impl VirtualMachine { self.sys_module.set_attr(name, stdio, self)?; Ok(()) }; - set_stdio("stdin", 0, false)?; - set_stdio("stdout", 1, true)?; - set_stdio("stderr", 2, true)?; + set_stdio("stdin", "__stdin__", 0, false)?; + set_stdio("stdout", "__stdout__", 1, true)?; + set_stdio("stderr", "__stderr__", 2, true)?; let io_open = io.get_attr("open", self)?; self.builtins.set_attr("open", io_open, self)?;