From 38d9f4087e9647d1a258a8f753851d76b2b61fd3 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Sun, 28 Jul 2019 13:43:14 +0200 Subject: [PATCH 1/2] Implement dictionary indexing by trait. --- vm/src/dictdatatype.rs | 112 +++++++++++++++++++++++++++++++++-------- vm/src/obj/objint.rs | 5 +- vm/src/pyhash.rs | 9 ++++ 3 files changed, 102 insertions(+), 24 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 8b20fd34a0..c73df2753c 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -2,6 +2,7 @@ use crate::obj::objbool; use crate::pyhash; use crate::pyobject::{IdProtocol, PyObjectRef, PyResult}; use crate::vm::VirtualMachine; +use num_bigint::ToBigInt; /// Ordered dictionary implementation. /// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html /// And: https://www.youtube.com/watch?v=p33CVV29OG8 @@ -93,7 +94,7 @@ impl Dict { } } - pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { + pub fn contains(&self, vm: &VirtualMachine, key: &K) -> PyResult { if let LookupResult::Existing(_) = self.lookup(vm, key)? { Ok(true) } else { @@ -111,7 +112,7 @@ impl Dict { /// Retrieve a key #[cfg_attr(feature = "flame-it", flame("Dict"))] - pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult> { + pub fn get(&self, vm: &VirtualMachine, key: &K) -> PyResult> { if let LookupResult::Existing(index) = self.lookup(vm, key)? { Ok(Some(self.unchecked_get(index))) } else { @@ -149,7 +150,7 @@ impl Dict { key: &PyObjectRef, value: T, ) -> PyResult<()> { - match self.lookup(vm, &key)? { + match self.lookup(vm, key)? { LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index), LookupResult::NewIndex { hash_value, @@ -199,8 +200,8 @@ impl Dict { /// Lookup the index for the given key. #[cfg_attr(feature = "flame-it", flame("Dict"))] - fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { - let hash_value = collection_hash(vm, key)?; + fn lookup(&self, vm: &VirtualMachine, key: &K) -> PyResult { + let hash_value = key.do_hash(vm)?; let perturb = hash_value; let mut hash_index: HashIndex = hash_value; loop { @@ -209,11 +210,11 @@ impl Dict { let index = self.indices[&hash_index]; if let Some(entry) = &self.entries[index] { // Okay, we have an entry at this place - if entry.key.is(key) { + if key.do_is(&entry.key) { // Literally the same object break Ok(LookupResult::Existing(index)); } else if entry.hash == hash_value { - if do_eq(vm, &entry.key, key)? { + if key.do_eq(vm, &entry.key)? { break Ok(LookupResult::Existing(index)); } else { // entry mismatch. @@ -242,7 +243,7 @@ impl Dict { } /// Retrieve and delete a key - pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult> { + pub fn pop(&mut self, vm: &VirtualMachine, key: &K) -> PyResult> { if let LookupResult::Existing(index) = self.lookup(vm, key)? { let value = self.unchecked_get(index); self.unchecked_delete(index); @@ -273,23 +274,63 @@ enum LookupResult { Existing(EntryIndex), // Existing record, index into entries } -#[cfg_attr(feature = "flame-it", flame())] -fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult { - let raw_hash = vm._hash(object)?; - let mut hasher = DefaultHasher::new(); - raw_hash.hash(&mut hasher); - Ok(hasher.finish() as HashValue) +/// Types implementing this trait can be used to index +/// the dictionary. Typical usecases are: +/// - PyObjectRef -> arbitrary python type used as key +/// - str -> string reference used as key, this is often used internally +pub trait DictKey { + fn do_hash(&self, vm: &VirtualMachine) -> PyResult; + fn do_is(&self, other: &PyObjectRef) -> bool; + fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult; } -/// Invoke __eq__ on two keys -fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result { - let result = vm._eq(key1.clone(), key2.clone())?; - objbool::boolval(vm, result) +/// Implement trait for PyObjectRef such that we can use python objects +/// to index dictionaries. +impl DictKey for PyObjectRef { + fn do_hash(&self, vm: &VirtualMachine) -> PyResult { + let raw_hash = vm._hash(self)?; + let mut hasher = DefaultHasher::new(); + raw_hash.hash(&mut hasher); + Ok(hasher.finish() as HashValue) + } + + fn do_is(&self, other: &PyObjectRef) -> bool { + self.is(other) + } + + fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult { + let result = vm._eq(self.clone(), other_key.clone())?; + objbool::boolval(vm, result) + } +} + +/// Implement trait for the str type, so that we can use strings +/// to index dictionaries. +impl DictKey for String { + fn do_hash(&self, _vm: &VirtualMachine) -> PyResult { + let raw_hash = pyhash::hash_value(self).to_bigint().unwrap(); + let raw_hash = pyhash::hash_bigint(&raw_hash); + let mut hasher = DefaultHasher::new(); + raw_hash.hash(&mut hasher); + Ok(hasher.finish() as HashValue) + } + + fn do_is(&self, _other: &PyObjectRef) -> bool { + // No matter who the other pyobject is, we are never the same thing, since + // we are a str, not a pyobject. + false + } + + fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult { + // Fall back to PyString implementation. + let s = vm.new_str(self.to_string()); + s.do_eq(vm, other_key) + } } #[cfg(test)] mod tests { - use super::{Dict, VirtualMachine}; + use super::{Dict, DictKey, VirtualMachine}; #[test] fn test_insert() { @@ -313,9 +354,40 @@ mod tests { dict.delete(&vm, &key1).unwrap(); assert_eq!(1, dict.len()); - dict.insert(&vm, &key1, value2).unwrap(); + dict.insert(&vm, &key1, value2.clone()).unwrap(); assert_eq!(2, dict.len()); assert_eq!(true, dict.contains(&vm, &key1).unwrap()); + assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap()); + + let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap(); + vm._eq(val, value2) + .expect("retrieved value must be equal to inserted value."); + } + + macro_rules! hash_tests { + ($($name:ident: $example_hash:expr,)*) => { + $( + #[test] + fn $name() { + check_hash_equivalence($example_hash); + } + )* + } + } + + hash_tests! { + test_abc: "abc", + test_x: "x", + } + + fn check_hash_equivalence(text: &str) { + let vm: VirtualMachine = Default::default(); + let value1 = text.to_string(); + let value2 = vm.new_str(value1.clone()); + + let hash1 = value1.do_hash(&vm).expect("Hash should not fail."); + let hash2 = value2.do_hash(&vm).expect("Hash should not fail."); + assert_eq!(hash1, hash2); } } diff --git a/vm/src/obj/objint.rs b/vm/src/obj/objint.rs index bf52bd0a0f..a158eb344d 100644 --- a/vm/src/obj/objint.rs +++ b/vm/src/obj/objint.rs @@ -418,10 +418,7 @@ impl PyInt { #[pymethod(name = "__hash__")] pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash { - match self.value.to_i64() { - Some(value) => (value % pyhash::MODULUS as i64), - None => (&self.value % pyhash::MODULUS).to_i64().unwrap(), - } + pyhash::hash_bigint(&self.value) } #[pymethod(name = "__abs__")] diff --git a/vm/src/pyhash.rs b/vm/src/pyhash.rs index 8bbd82168e..83776bd58b 100644 --- a/vm/src/pyhash.rs +++ b/vm/src/pyhash.rs @@ -1,3 +1,5 @@ +use num_bigint::BigInt; +use num_traits::ToPrimitive; use std::hash::{Hash, Hasher}; use crate::obj::objfloat; @@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator>( } Ok(hasher.finish() as PyHash) } + +pub fn hash_bigint(value: &BigInt) -> PyHash { + match value.to_i64() { + Some(i64_value) => (i64_value % MODULUS as i64), + None => (value % MODULUS).to_i64().unwrap(), + } +} From 42dca4433aef7d76538c0d1e89376b5cf64069e9 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Sun, 28 Jul 2019 20:39:22 +0200 Subject: [PATCH 2/2] Implement do_eq for String impl of DictKey. Also move get_item_option to dict type. --- vm/src/dictdatatype.rs | 12 +++++++++--- vm/src/obj/objdict.rs | 18 ++++++++++++++++++ vm/src/obj/objsuper.rs | 2 +- vm/src/pyobject.rs | 18 ------------------ 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index c73df2753c..4442ea7c64 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -1,4 +1,5 @@ use crate::obj::objbool; +use crate::obj::objstr::PyString; use crate::pyhash; use crate::pyobject::{IdProtocol, PyObjectRef, PyResult}; use crate::vm::VirtualMachine; @@ -308,6 +309,7 @@ impl DictKey for PyObjectRef { /// to index dictionaries. impl DictKey for String { fn do_hash(&self, _vm: &VirtualMachine) -> PyResult { + // follow a similar route as the hashing of PyStringRef let raw_hash = pyhash::hash_value(self).to_bigint().unwrap(); let raw_hash = pyhash::hash_bigint(&raw_hash); let mut hasher = DefaultHasher::new(); @@ -322,9 +324,13 @@ impl DictKey for String { } fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult { - // Fall back to PyString implementation. - let s = vm.new_str(self.to_string()); - s.do_eq(vm, other_key) + if let Some(py_str_value) = other_key.payload::() { + Ok(&py_str_value.value == self) + } else { + // Fall back to PyString implementation. + let s = vm.new_str(self.to_string()); + s.do_eq(vm, other_key) + } } } diff --git a/vm/src/obj/objdict.rs b/vm/src/obj/objdict.rs index 267678b206..e68c6b6a48 100644 --- a/vm/src/obj/objdict.rs +++ b/vm/src/obj/objdict.rs @@ -11,6 +11,7 @@ use crate::vm::{ReprGuard, VirtualMachine}; use super::objbool; use super::objiter; use super::objstr; +use super::objtype; use crate::dictdatatype; use crate::obj::objtype::PyClassRef; use crate::pyobject::PyClassImpl; @@ -316,6 +317,23 @@ impl PyDictRef { pub fn size(&self) -> dictdatatype::DictSize { self.entries.borrow().size() } + + pub fn get_item_option( + &self, + key: T, + vm: &VirtualMachine, + ) -> PyResult> { + match self.get_item(key, vm) { + Ok(value) => Ok(Some(value)), + Err(exc) => { + if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) { + Ok(None) + } else { + Err(exc) + } + } + } + } } impl ItemProtocol for PyDictRef { diff --git a/vm/src/obj/objsuper.rs b/vm/src/obj/objsuper.rs index 016c2419c0..a9fb668cb8 100644 --- a/vm/src/obj/objsuper.rs +++ b/vm/src/obj/objsuper.rs @@ -11,7 +11,7 @@ use crate::obj::objfunction::PyMethod; use crate::obj::objstr; use crate::obj::objtype::{PyClass, PyClassRef}; use crate::pyobject::{ - ItemProtocol, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, + PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, }; use crate::scope::NameProtocol; use crate::vm::VirtualMachine; diff --git a/vm/src/pyobject.rs b/vm/src/pyobject.rs index cf249835e4..ba40d54735 100644 --- a/vm/src/pyobject.rs +++ b/vm/src/pyobject.rs @@ -1044,24 +1044,6 @@ pub trait ItemProtocol { vm: &VirtualMachine, ) -> PyResult; fn del_item(&self, key: T, vm: &VirtualMachine) -> PyResult; - - #[cfg_attr(feature = "flame-it", flame("ItemProtocol"))] - fn get_item_option( - &self, - key: T, - vm: &VirtualMachine, - ) -> PyResult> { - match self.get_item(key, vm) { - Ok(value) => Ok(Some(value)), - Err(exc) => { - if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) { - Ok(None) - } else { - Err(exc) - } - } - } - } } impl ItemProtocol for PyObjectRef {