Skip to content

Commit b30ca99

Browse files
committed
Implement string interning
1 parent f390f61 commit b30ca99

File tree

18 files changed

+266
-114
lines changed

18 files changed

+266
-114
lines changed

Lib/test/test_pathlib.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,8 +2044,6 @@ def test_pickling_common(self):
20442044
pp = pickle.loads(dumped)
20452045
self.assertEqual(pp.stat(), p.stat())
20462046

2047-
# TODO: RUSTPYTHON
2048-
@unittest.expectedFailure
20492047
def test_parts_interning(self):
20502048
P = self.cls
20512049
p = P('/usr/bin/foo')

Lib/test/test_sys.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,6 @@ def test_43581(self):
539539
# the test runs under regrtest.
540540
self.assertEqual(sys.__stdout__.encoding, sys.__stderr__.encoding)
541541

542-
@unittest.skip("TODO: RUSTPYTHON; sys.intern() string interning")
543542
def test_intern(self):
544543
global INTERN_NUMRUNS
545544
INTERN_NUMRUNS += 1

examples/freeze/main.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ fn run(vm: &vm::VirtualMachine) -> vm::pyobject::PyResult<()> {
1515
vm::py_freeze!(file = "examples/freeze/freeze.py");
1616

1717
let res = vm.run_code_obj(
18-
vm.ctx
19-
.new_code_object(modules.get("frozen").unwrap().code.clone()),
18+
vm.new_code_object(modules.get("frozen").unwrap().code.clone()),
2019
scope,
2120
);
2221

examples/mini_repl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ macro_rules! add_python_function {
1717
// compile the code to bytecode
1818
let code = vm::py_compile!(source = $src);
1919
// convert the rustpython_bytecode::CodeObject to a PyCodeRef
20-
let code = $vm.ctx.new_code_object(code);
20+
let code = $vm.new_code_object(code);
2121

2222
// run the python code in the scope to store the function
2323
$vm.run_code_obj(code, $scope.clone())

vm/src/builtins/code.rs

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,59 @@ impl Constant for PyConstant {
7474
}
7575
}
7676

77-
pub(crate) struct PyObjBag<'a>(pub &'a PyContext);
77+
pub(crate) struct PyObjBag<'a>(pub &'a VirtualMachine);
7878

7979
impl ConstantBag for PyObjBag<'_> {
8080
type Constant = PyConstant;
8181
fn make_constant(&self, constant: bytecode::ConstantData) -> Self::Constant {
82-
PyConstant(self.0.unwrap_constant(constant))
82+
let vm = self.0;
83+
let ctx = &vm.ctx;
84+
let obj = match constant {
85+
bytecode::ConstantData::Integer { value } => ctx.new_int(value),
86+
bytecode::ConstantData::Float { value } => ctx.new_float(value),
87+
bytecode::ConstantData::Complex { value } => ctx.new_complex(value),
88+
bytecode::ConstantData::Str { value } => vm.intern_string(value).into_object(),
89+
bytecode::ConstantData::Bytes { value } => ctx.new_bytes(value.to_vec()),
90+
bytecode::ConstantData::Boolean { value } => ctx.new_bool(value),
91+
bytecode::ConstantData::Code { code } => {
92+
ctx.new_code_object(code.map_bag(self)).into_object()
93+
}
94+
bytecode::ConstantData::Tuple { elements } => {
95+
let elements = elements
96+
.into_iter()
97+
.map(|constant| self.make_constant(constant).0)
98+
.collect();
99+
ctx.new_tuple(elements)
100+
}
101+
bytecode::ConstantData::None => ctx.none(),
102+
bytecode::ConstantData::Ellipsis => ctx.ellipsis(),
103+
};
104+
PyConstant(obj)
83105
}
84106
fn make_constant_borrowed<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant {
85-
// TODO: check if the constant is a string and try interning it without cloning
86-
self.make_constant(constant.into_data())
107+
let vm = self.0;
108+
let ctx = &vm.ctx;
109+
let obj = match constant {
110+
bytecode::BorrowedConstant::Integer { value } => ctx.new_bigint(value),
111+
bytecode::BorrowedConstant::Float { value } => ctx.new_float(value),
112+
bytecode::BorrowedConstant::Complex { value } => ctx.new_complex(value),
113+
bytecode::BorrowedConstant::Str { value } => vm.intern_string(value).into_object(),
114+
bytecode::BorrowedConstant::Bytes { value } => ctx.new_bytes(value.to_vec()),
115+
bytecode::BorrowedConstant::Boolean { value } => ctx.new_bool(value),
116+
bytecode::BorrowedConstant::Code { code } => {
117+
ctx.new_code_object(code.map_clone_bag(self)).into_object()
118+
}
119+
bytecode::BorrowedConstant::Tuple { elements } => {
120+
let elements = elements
121+
.into_iter()
122+
.map(|constant| self.make_constant_borrowed(constant).0)
123+
.collect();
124+
ctx.new_tuple(elements)
125+
}
126+
bytecode::BorrowedConstant::None => ctx.none(),
127+
bytecode::BorrowedConstant::Ellipsis => ctx.ellipsis(),
128+
};
129+
PyConstant(obj)
87130
}
88131
}
89132

@@ -93,16 +136,16 @@ pub type CodeObject = bytecode::CodeObject<PyConstant>;
93136
pub type FrozenModule = bytecode::FrozenModule<PyConstant>;
94137

95138
pub trait IntoCodeObject {
96-
fn into_codeobj(self, ctx: &PyContext) -> CodeObject;
139+
fn into_codeobj(self, vm: &VirtualMachine) -> CodeObject;
97140
}
98141
impl IntoCodeObject for CodeObject {
99-
fn into_codeobj(self, _ctx: &PyContext) -> CodeObject {
142+
fn into_codeobj(self, _vm: &VirtualMachine) -> CodeObject {
100143
self
101144
}
102145
}
103146
impl IntoCodeObject for bytecode::CodeObject {
104-
fn into_codeobj(self, ctx: &PyContext) -> CodeObject {
105-
ctx.map_codeobj(self)
147+
fn into_codeobj(self, vm: &VirtualMachine) -> CodeObject {
148+
vm.map_codeobj(self)
106149
}
107150
}
108151

vm/src/builtins/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub use property::PyProperty;
5353
pub(crate) mod pybool;
5454
pub use pybool::{IntoPyBool, PyBool};
5555
pub(crate) mod pystr;
56-
pub use pystr::{PyStr, PyStrRef};
56+
pub use pystr::{PyStr, PyStrExact, PyStrRef};
5757
pub(crate) mod pysuper;
5858
pub use pysuper::PySuper;
5959
pub(crate) mod pytype;

vm/src/builtins/pystr.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,3 +1451,34 @@ impl<'s> AnyStr<'s, char> for str {
14511451
splited
14521452
}
14531453
}
1454+
1455+
#[repr(transparent)]
1456+
pub struct PyStrExact(PyStrRef);
1457+
impl PyStrExact {
1458+
pub fn from_str(s: PyStrRef, ctx: &PyContext) -> Result<Self, PyStrRef> {
1459+
if s.class().is(&ctx.types.str_type) {
1460+
Ok(Self(s))
1461+
} else {
1462+
Err(s)
1463+
}
1464+
}
1465+
pub fn into_pystr(self) -> PyStrRef {
1466+
self.0
1467+
}
1468+
}
1469+
impl IntoPyObject for PyStrExact {
1470+
fn into_pyobject(self, _vm: &VirtualMachine) -> PyObjectRef {
1471+
self.0.into_object()
1472+
}
1473+
}
1474+
impl crate::dictdatatype::DictKey for PyStrExact {
1475+
fn key_hash(&self, vm: &VirtualMachine) -> PyResult<hash::PyHash> {
1476+
self.0.key_hash(vm)
1477+
}
1478+
fn key_is(&self, other: &PyObjectRef) -> bool {
1479+
self.0.key_is(other)
1480+
}
1481+
fn key_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
1482+
self.0.key_eq(vm, other_key)
1483+
}
1484+
}

vm/src/dictdatatype.rs

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ use crate::builtins::pystr::{PyStr, PyStrRef};
44
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
55
/// And: http://code.activestate.com/recipes/578375/
66
use crate::common::lock::{PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard};
7-
use crate::pyobject::{BorrowValue, IdProtocol, IntoPyObject, PyObjectRef, PyResult};
7+
use crate::pyobject::{BorrowValue, IdProtocol, IntoPyObject, PyObjectRef, PyResult, TypeProtocol};
88
use crate::vm::VirtualMachine;
99
use rustpython_common::hash;
10+
use std::fmt;
1011
use std::mem::size_of;
1112

1213
// HashIndex is intended to be same size with hash::PyHash
@@ -24,6 +25,11 @@ type EntryIndex = usize;
2425
pub struct Dict<T = PyObjectRef> {
2526
inner: PyRwLock<DictInner<T>>,
2627
}
28+
impl<T> fmt::Debug for Dict<T> {
29+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30+
f.debug_struct("Debug").finish()
31+
}
32+
}
2733

2834
#[derive(Debug, Copy, Clone)]
2935
enum IndexEntry {
@@ -336,8 +342,9 @@ impl<T: Clone> Dict<T> {
336342
Ok(())
337343
}
338344

339-
pub fn setdefault<F>(&self, vm: &VirtualMachine, key: PyObjectRef, default: F) -> PyResult<T>
345+
pub fn setdefault<K, F>(&self, vm: &VirtualMachine, key: K, default: F) -> PyResult<T>
340346
where
347+
K: DictKey,
341348
F: FnOnce() -> T,
342349
{
343350
let hash = key.key_hash(vm)?;
@@ -359,13 +366,57 @@ impl<T: Clone> Dict<T> {
359366
} else {
360367
let value = default();
361368
let mut inner = self.borrow_value_mut();
362-
inner.unchecked_push(index_index, hash, key, value.clone(), entry);
369+
inner.unchecked_push(
370+
index_index,
371+
hash,
372+
key.into_pyobject(vm),
373+
value.clone(),
374+
entry,
375+
);
363376
break value;
364377
}
365378
};
366379
Ok(res)
367380
}
368381

382+
pub fn setdefault_entry<K, F>(
383+
&self,
384+
vm: &VirtualMachine,
385+
key: K,
386+
default: F,
387+
) -> PyResult<(PyObjectRef, T)>
388+
where
389+
K: DictKey,
390+
F: FnOnce() -> T,
391+
{
392+
let hash = key.key_hash(vm)?;
393+
let res = loop {
394+
let lookup = self.lookup(vm, &key, hash, None)?;
395+
let (entry, index_index) = lookup;
396+
if let IndexEntry::Index(index) = entry {
397+
let inner = self.borrow_value();
398+
if let Some(entry) = inner.entries.get(index) {
399+
if entry.index == index_index {
400+
break (entry.key.clone(), entry.value.clone());
401+
} else {
402+
// stuff shifted around, let's try again
403+
}
404+
} else {
405+
// The dict was changed since we did lookup, let's try again.
406+
continue;
407+
}
408+
} else {
409+
let value = default();
410+
let key = key.into_pyobject(vm);
411+
let mut inner = self.borrow_value_mut();
412+
let ret = (key.clone(), value.clone());
413+
inner.unchecked_push(index_index, hash, key, value, entry);
414+
break ret;
415+
}
416+
};
417+
Ok(res)
418+
}
419+
369420
pub fn len(&self) -> usize {
370421
self.borrow_value().used
371422
}
@@ -561,8 +612,8 @@ impl DictKey for PyStrRef {
561612
fn key_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
562613
if self.is(other_key) {
563614
Ok(true)
564-
} else if let Some(py_str_value) = other_key.payload::<PyStr>() {
565-
Ok(py_str_value.borrow_value() == self.borrow_value())
615+
} else if let Some(pystr) = str_exact(other_key, vm) {
616+
Ok(pystr.borrow_value() == self.borrow_value())
566617
} else {
567618
vm.bool_eq(self.as_object(), other_key)
568619
}
@@ -586,8 +637,8 @@ impl DictKey for &str {
586637
}
587638

588639
fn key_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
589-
if let Some(py_str_value) = other_key.payload::<PyStr>() {
590-
Ok(py_str_value.borrow_value() == *self)
640+
if let Some(pystr) = str_exact(other_key, vm) {
641+
Ok(pystr.borrow_value() == *self)
591642
} else {
592643
// Fall back to PyObjectRef implementation.
593644
let s = vm.ctx.new_str(*self);
@@ -596,6 +647,28 @@ impl DictKey for &str {
596647
}
597648
}
598649

650+
impl DictKey for String {
651+
fn key_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
652+
self.as_str().key_hash(vm)
653+
}
654+
655+
fn key_is(&self, other: &PyObjectRef) -> bool {
656+
self.as_str().key_is(other)
657+
}
658+
659+
fn key_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
660+
self.as_str().key_eq(vm, other_key)
661+
}
662+
}
663+
664+
fn str_exact<'a>(obj: &'a PyObjectRef, vm: &VirtualMachine) -> Option<&'a PyStr> {
665+
if obj.class().is(&vm.ctx.types.str_type) {
666+
obj.payload::<PyStr>()
667+
} else {
668+
None
669+
}
670+
}
671+
599672
#[cfg(test)]
600673
mod tests {
601674
use super::{Dict, DictKey};

vm/src/frame.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,11 @@ impl ExecutingFrame<'_> {
255255
flame_guard!(format!("Frame::run({})", self.code.obj_name));
256256
// Execute until return or exception:
257257
loop {
258-
let loc = self.current_location();
259-
let result = self.execute_instruction(vm);
258+
let idx = self.lasti.fetch_add(1, Ordering::Relaxed);
259+
let loc = self.code.locations[idx];
260+
let instr = &self.code.instructions[idx];
261+
vm.check_signals()?;
262+
let result = self.execute_instruction(instr, vm);
260263
match result {
261264
Ok(None) => {}
262265
Ok(Some(value)) => {
@@ -329,11 +332,12 @@ impl ExecutingFrame<'_> {
329332
}
330333

331334
/// Execute a single instruction.
332-
fn execute_instruction(&mut self, vm: &VirtualMachine) -> FrameResult {
333-
vm.check_signals()?;
334-
335-
let instruction = &self.code.instructions[self.lasti.fetch_add(1, Ordering::Relaxed)];
336-
335+
#[inline(always)]
336+
fn execute_instruction(
337+
&mut self,
338+
instruction: &bytecode::Instruction,
339+
vm: &VirtualMachine,
340+
) -> FrameResult {
337341
flame_guard!(format!("Frame::execute_instruction({:?})", instruction));
338342

339343
#[cfg(feature = "vm-tracing-logging")]
@@ -1465,10 +1469,6 @@ impl ExecutingFrame<'_> {
14651469
self.lasti.load(Ordering::Relaxed)
14661470
}
14671471

1468-
fn current_location(&self) -> bytecode::Location {
1469-
self.code.locations[self.lasti()]
1470-
}
1471-
14721472
fn push_block(&mut self, typ: BlockType) {
14731473
self.state.blocks.push(Block {
14741474
typ,

vm/src/frozen.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
1-
use super::pyobject::PyContext;
21
use crate::builtins::code;
32
use crate::bytecode;
3+
use crate::VirtualMachine;
44
use std::collections::HashMap;
55

66
pub fn map_frozen<'a>(
7-
ctx: &'a PyContext,
7+
vm: &'a VirtualMachine,
88
i: impl IntoIterator<Item = (String, bytecode::FrozenModule)> + 'a,
99
) -> impl Iterator<Item = (String, code::FrozenModule)> + 'a {
1010
i.into_iter()
1111
.map(move |(k, bytecode::FrozenModule { code, package })| {
1212
(
1313
k,
1414
code::FrozenModule {
15-
code: ctx.map_codeobj(code),
15+
code: vm.map_codeobj(code),
1616
package,
1717
},
1818
)
1919
})
2020
}
2121

22-
pub fn get_module_inits(ctx: &PyContext) -> HashMap<String, code::FrozenModule> {
22+
pub fn get_module_inits(vm: &VirtualMachine) -> HashMap<String, code::FrozenModule> {
2323
let mut modules = HashMap::new();
2424

2525
macro_rules! ext_modules {
2626
($($t:tt)*) => {
27-
modules.extend(map_frozen(ctx, py_freeze!($($t)*)));
27+
modules.extend(map_frozen(vm, py_freeze!($($t)*)));
2828
};
2929
}
3030

@@ -48,7 +48,7 @@ pub fn get_module_inits(ctx: &PyContext) -> HashMap<String, code::FrozenModule>
4848
// if we're on freeze-stdlib, the core stdlib modules will be included anyway
4949
#[cfg(feature = "freeze-stdlib")]
5050
{
51-
modules.extend(map_frozen(ctx, rustpython_pylib::frozen_stdlib()));
51+
modules.extend(map_frozen(vm, rustpython_pylib::frozen_stdlib()));
5252
}
5353

5454
modules

0 commit comments

Comments
 (0)