Skip to content

Commit 64b951e

Browse files
authored
Merge pull request RustPython#2325 from RustPython/coolreader18/string-intern
Add string interning
2 parents d76cb3b + 9214f41 commit 64b951e

File tree

29 files changed

+706
-423
lines changed

29 files changed

+706
-423
lines changed

Lib/test/test_pathlib.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,8 +2044,6 @@ def test_pickling_common(self):
20442044
pp = pickle.loads(dumped)
20452045
self.assertEqual(pp.stat(), p.stat())
20462046

2047-
# TODO: RUSTPYTHON
2048-
@unittest.expectedFailure
20492047
def test_parts_interning(self):
20502048
P = self.cls
20512049
p = P('/usr/bin/foo')

Lib/test/test_sys.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,6 @@ def test_43581(self):
539539
# the test runs under regrtest.
540540
self.assertEqual(sys.__stdout__.encoding, sys.__stderr__.encoding)
541541

542-
@unittest.skip("TODO: RUSTPYTHON; sys.intern() string interning")
543542
def test_intern(self):
544543
global INTERN_NUMRUNS
545544
INTERN_NUMRUNS += 1

bytecode/src/bytecode.rs

Lines changed: 122 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ impl Location {
3232
}
3333

3434
pub trait Constant: Sized {
35+
type Name: AsRef<str>;
3536
fn borrow_constant(&self) -> BorrowedConstant<Self>;
3637
fn into_data(self) -> ConstantData {
3738
self.borrow_constant().into_data()
@@ -41,6 +42,7 @@ pub trait Constant: Sized {
4142
}
4243
}
4344
impl Constant for ConstantData {
45+
type Name = String;
4446
fn borrow_constant(&self) -> BorrowedConstant<Self> {
4547
use BorrowedConstant::*;
4648
match self {
@@ -69,6 +71,10 @@ pub trait ConstantBag: Sized {
6971
fn make_constant_borrowed<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant {
7072
self.make_constant(constant.into_data())
7173
}
74+
fn make_name(&self, name: String) -> <Self::Constant as Constant>::Name;
75+
fn make_name_ref(&self, name: &str) -> <Self::Constant as Constant>::Name {
76+
self.make_name(name.to_owned())
77+
}
7278
}
7379

7480
#[derive(Clone)]
@@ -78,8 +84,8 @@ impl ConstantBag for BasicBag {
7884
fn make_constant(&self, constant: ConstantData) -> Self::Constant {
7985
constant
8086
}
81-
fn make_constant_borrowed<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant {
82-
constant.into_data()
87+
fn make_name(&self, name: String) -> <Self::Constant as Constant>::Name {
88+
name
8389
}
8490
}
8591

@@ -93,14 +99,17 @@ pub struct CodeObject<C: Constant = ConstantData> {
9399
pub locations: Vec<Location>,
94100
pub flags: CodeFlags,
95101
pub posonlyarg_count: usize, // Number of positional-only arguments
96-
pub arg_names: Vec<String>, // Names of positional arguments
97-
pub varargs_name: Option<String>, // *args or *
98-
pub kwonlyarg_names: Vec<String>,
99-
pub varkeywords_name: Option<String>, // **kwargs or **
102+
pub arg_count: usize,
103+
pub kwonlyarg_count: usize,
100104
pub source_path: String,
101105
pub first_line_number: usize,
102106
pub obj_name: String, // Name of the object that created this code object
103107
pub constants: Vec<C>,
108+
#[serde(bound(
109+
deserialize = "C::Name: serde::Deserialize<'de>",
110+
serialize = "C::Name: serde::Serialize"
111+
))]
112+
pub names: Vec<C::Name>,
104113
}
105114

106115
bitflags! {
@@ -172,37 +181,39 @@ pub enum ConversionFlag {
172181
Repr,
173182
}
174183

184+
pub type NameIdx = usize;
185+
175186
/// A Single bytecode instruction.
176187
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
177188
pub enum Instruction {
178189
Import {
179-
name: Option<String>,
180-
symbols: Vec<String>,
190+
name_idx: Option<NameIdx>,
191+
symbols_idx: Vec<NameIdx>,
181192
level: usize,
182193
},
183194
ImportStar,
184195
ImportFrom {
185-
name: String,
196+
idx: NameIdx,
186197
},
187198
LoadName {
188-
name: String,
199+
idx: NameIdx,
189200
scope: NameScope,
190201
},
191202
StoreName {
192-
name: String,
203+
idx: NameIdx,
193204
scope: NameScope,
194205
},
195206
DeleteName {
196-
name: String,
207+
idx: NameIdx,
197208
},
198209
Subscript,
199210
StoreSubscript,
200211
DeleteSubscript,
201212
StoreAttr {
202-
name: String,
213+
idx: NameIdx,
203214
},
204215
DeleteAttr {
205-
name: String,
216+
idx: NameIdx,
206217
},
207218
LoadConst {
208219
/// index into constants vec
@@ -216,7 +227,7 @@ pub enum Instruction {
216227
inplace: bool,
217228
},
218229
LoadAttr {
219-
name: String,
230+
idx: NameIdx,
220231
},
221232
CompareOperation {
222233
op: ComparisonOperator,
@@ -503,15 +514,37 @@ pub enum BlockType {
503514
}
504515
*/
505516

517+
pub struct Arguments<'a, N: AsRef<str>> {
518+
pub posonlyargs: &'a [N],
519+
pub args: &'a [N],
520+
pub vararg: Option<&'a N>,
521+
pub kwonlyargs: &'a [N],
522+
pub varkwarg: Option<&'a N>,
523+
}
524+
525+
impl<N: AsRef<str>> fmt::Debug for Arguments<'_, N> {
526+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527+
macro_rules! fmt_slice {
528+
($x:expr) => {
529+
format_args!("[{}]", $x.iter().map(AsRef::as_ref).format(", "))
530+
};
531+
}
532+
f.debug_struct("Arguments")
533+
.field("posonlyargs", &fmt_slice!(self.posonlyargs))
534+
.field("args", &fmt_slice!(self.posonlyargs))
535+
.field("vararg", &self.vararg.map(N::as_ref))
536+
.field("kwonlyargs", &fmt_slice!(self.kwonlyargs))
537+
.field("varkwarg", &self.varkwarg.map(N::as_ref))
538+
.finish()
539+
}
540+
}
541+
506542
impl<C: Constant> CodeObject<C> {
507-
#[allow(clippy::too_many_arguments)]
508543
pub fn new(
509544
flags: CodeFlags,
510545
posonlyarg_count: usize,
511-
arg_names: Vec<String>,
512-
varargs_name: Option<String>,
513-
kwonlyarg_names: Vec<String>,
514-
varkeywords_name: Option<String>,
546+
arg_count: usize,
547+
kwonlyarg_count: usize,
515548
source_path: String,
516549
first_line_number: usize,
517550
obj_name: String,
@@ -522,40 +555,45 @@ impl<C: Constant> CodeObject<C> {
522555
locations: Vec::new(),
523556
flags,
524557
posonlyarg_count,
525-
arg_names,
526-
varargs_name,
527-
kwonlyarg_names,
528-
varkeywords_name,
558+
arg_count,
559+
kwonlyarg_count,
529560
source_path,
530561
first_line_number,
531562
obj_name,
532563
constants: Vec::new(),
564+
names: Vec::new(),
533565
}
534566
}
535567

536-
pub fn varnames(&self) -> impl Iterator<Item = &str> + '_ {
537-
self.arg_names
538-
.iter()
539-
.map(String::as_str)
540-
.chain(self.kwonlyarg_names.iter().map(String::as_str))
541-
.chain(self.varargs_name.as_deref())
542-
.chain(self.varkeywords_name.as_deref())
543-
.chain(
544-
self.instructions
545-
.iter()
546-
.filter_map(|i| match i {
547-
Instruction::LoadName {
548-
name,
549-
scope: NameScope::Local,
550-
}
551-
| Instruction::StoreName {
552-
name,
553-
scope: NameScope::Local,
554-
} => Some(name.as_str()),
555-
_ => None,
556-
})
557-
.unique(),
558-
)
568+
// like inspect.getargs
569+
pub fn arg_names(&self) -> Arguments<C::Name> {
570+
let nargs = self.arg_count;
571+
let nkwargs = self.kwonlyarg_count;
572+
let mut varargspos = nargs + nkwargs;
573+
let posonlyargs = &self.names[..self.posonlyarg_count];
574+
let args = &self.names[..nargs];
575+
let kwonlyargs = &self.names[nargs..varargspos];
576+
577+
let vararg = if self.flags.contains(CodeFlags::HAS_VARARGS) {
578+
let vararg = &self.names[varargspos];
579+
varargspos += 1;
580+
Some(vararg)
581+
} else {
582+
None
583+
};
584+
let varkwarg = if self.flags.contains(CodeFlags::HAS_VARKEYWORDS) {
585+
Some(&self.names[varargspos])
586+
} else {
587+
None
588+
};
589+
590+
Arguments {
591+
posonlyargs,
592+
args,
593+
vararg,
594+
kwonlyargs,
595+
varkwarg,
596+
}
559597
}
560598

561599
fn display_inner(
@@ -579,6 +617,7 @@ impl<C: Constant> CodeObject<C> {
579617
f,
580618
&self.label_map,
581619
&self.constants,
620+
&self.names,
582621
expand_codeobjects,
583622
level,
584623
)?;
@@ -603,16 +642,19 @@ impl<C: Constant> CodeObject<C> {
603642
.into_iter()
604643
.map(|x| x.map_constant(bag))
605644
.collect(),
645+
names: self
646+
.names
647+
.into_iter()
648+
.map(|x| bag.make_name_ref(x.as_ref()))
649+
.collect(),
606650

607651
instructions: self.instructions,
608652
label_map: self.label_map,
609653
locations: self.locations,
610654
flags: self.flags,
611655
posonlyarg_count: self.posonlyarg_count,
612-
arg_names: self.arg_names,
613-
varargs_name: self.varargs_name,
614-
kwonlyarg_names: self.kwonlyarg_names,
615-
varkeywords_name: self.varkeywords_name,
656+
arg_count: self.arg_count,
657+
kwonlyarg_count: self.kwonlyarg_count,
616658
source_path: self.source_path,
617659
first_line_number: self.first_line_number,
618660
obj_name: self.obj_name,
@@ -626,16 +668,19 @@ impl<C: Constant> CodeObject<C> {
626668
.iter()
627669
.map(|x| bag.make_constant_borrowed(x.borrow_constant()))
628670
.collect(),
671+
names: self
672+
.names
673+
.iter()
674+
.map(|x| bag.make_name_ref(x.as_ref()))
675+
.collect(),
629676

630677
instructions: self.instructions.clone(),
631678
label_map: self.label_map.clone(),
632679
locations: self.locations.clone(),
633680
flags: self.flags,
634681
posonlyarg_count: self.posonlyarg_count,
635-
arg_names: self.arg_names.clone(),
636-
varargs_name: self.varargs_name.clone(),
637-
kwonlyarg_names: self.kwonlyarg_names.clone(),
638-
varkeywords_name: self.varkeywords_name.clone(),
682+
arg_count: self.arg_count,
683+
kwonlyarg_count: self.kwonlyarg_count,
639684
source_path: self.source_path.clone(),
640685
first_line_number: self.first_line_number,
641686
obj_name: self.obj_name.clone(),
@@ -677,6 +722,7 @@ impl Instruction {
677722
f: &mut fmt::Formatter,
678723
label_map: &BTreeMap<Label, usize>,
679724
constants: &[C],
725+
names: &[C::Name],
680726
expand_codeobjects: bool,
681727
level: usize,
682728
) -> fmt::Result {
@@ -704,25 +750,31 @@ impl Instruction {
704750

705751
match self {
706752
Import {
707-
name,
708-
symbols,
753+
name_idx,
754+
symbols_idx,
709755
level,
710756
} => w!(
711757
Import,
712-
format!("{:?}", name),
713-
format!("{:?}", symbols),
758+
format!("{:?}", name_idx.map(|idx| names[idx].as_ref())),
759+
format!(
760+
"({:?})",
761+
symbols_idx
762+
.iter()
763+
.map(|&idx| names[idx].as_ref())
764+
.format(", ")
765+
),
714766
level
715767
),
716768
ImportStar => w!(ImportStar),
717-
ImportFrom { name } => w!(ImportFrom, name),
718-
LoadName { name, scope } => w!(LoadName, name, format!("{:?}", scope)),
719-
StoreName { name, scope } => w!(StoreName, name, format!("{:?}", scope)),
720-
DeleteName { name } => w!(DeleteName, name),
769+
ImportFrom { idx } => w!(ImportFrom, names[*idx].as_ref()),
770+
LoadName { idx, scope } => w!(LoadName, names[*idx].as_ref(), format!("{:?}", scope)),
771+
StoreName { idx, scope } => w!(StoreName, names[*idx].as_ref(), format!("{:?}", scope)),
772+
DeleteName { idx } => w!(DeleteName, names[*idx].as_ref()),
721773
Subscript => w!(Subscript),
722774
StoreSubscript => w!(StoreSubscript),
723775
DeleteSubscript => w!(DeleteSubscript),
724-
StoreAttr { name } => w!(StoreAttr, name),
725-
DeleteAttr { name } => w!(DeleteAttr, name),
776+
StoreAttr { idx } => w!(StoreAttr, names[*idx].as_ref()),
777+
DeleteAttr { idx } => w!(DeleteAttr, names[*idx].as_ref()),
726778
LoadConst { idx } => {
727779
let value = &constants[*idx];
728780
match value.borrow_constant() {
@@ -740,7 +792,7 @@ impl Instruction {
740792
}
741793
UnaryOperation { op } => w!(UnaryOperation, format!("{:?}", op)),
742794
BinaryOperation { op, inplace } => w!(BinaryOperation, format!("{:?}", op), inplace),
743-
LoadAttr { name } => w!(LoadAttr, name),
795+
LoadAttr { idx } => w!(LoadAttr, names[*idx].as_ref()),
744796
CompareOperation { op } => w!(CompareOperation, format!("{:?}", op)),
745797
Pop => w!(Pop),
746798
Rotate { amount } => w!(Rotate, amount),
@@ -818,6 +870,10 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
818870

819871
#[derive(Serialize, Deserialize)]
820872
pub struct FrozenModule<C: Constant = ConstantData> {
873+
#[serde(bound(
874+
deserialize = "C: serde::Deserialize<'de>, C::Name: serde::Deserialize<'de>",
875+
serialize = "C: serde::Serialize, C::Name: serde::Serialize"
876+
))]
821877
pub code: CodeObject<C>,
822878
pub package: bool,
823879
}

0 commit comments

Comments
 (0)