Skip to content

Implement dictionary indexing by trait. #1187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 98 additions & 20 deletions vm/src/dictdatatype.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::obj::objbool;
use crate::obj::objstr::PyString;
use crate::pyhash;
use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
use crate::vm::VirtualMachine;
use num_bigint::ToBigInt;
/// Ordered dictionary implementation.
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
Expand Down Expand Up @@ -93,7 +95,7 @@ impl<T: Clone> Dict<T> {
}
}

pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> {
pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> {
if let LookupResult::Existing(_) = self.lookup(vm, key)? {
Ok(true)
} else {
Expand All @@ -111,7 +113,7 @@ impl<T: Clone> Dict<T> {

/// Retrieve a key
#[cfg_attr(feature = "flame-it", flame("Dict"))]
pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
Ok(Some(self.unchecked_get(index)))
} else {
Expand Down Expand Up @@ -149,7 +151,7 @@ impl<T: Clone> Dict<T> {
key: &PyObjectRef,
value: T,
) -> PyResult<()> {
match self.lookup(vm, &key)? {
match self.lookup(vm, key)? {
LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index),
LookupResult::NewIndex {
hash_value,
Expand Down Expand Up @@ -199,8 +201,8 @@ impl<T: Clone> Dict<T> {

/// Lookup the index for the given key.
#[cfg_attr(feature = "flame-it", flame("Dict"))]
fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> {
let hash_value = collection_hash(vm, key)?;
fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> {
let hash_value = key.do_hash(vm)?;
let perturb = hash_value;
let mut hash_index: HashIndex = hash_value;
loop {
Expand All @@ -209,11 +211,11 @@ impl<T: Clone> Dict<T> {
let index = self.indices[&hash_index];
if let Some(entry) = &self.entries[index] {
// Okay, we have an entry at this place
if entry.key.is(key) {
if key.do_is(&entry.key) {
// Literally the same object
break Ok(LookupResult::Existing(index));
} else if entry.hash == hash_value {
if do_eq(vm, &entry.key, key)? {
if key.do_eq(vm, &entry.key)? {
break Ok(LookupResult::Existing(index));
} else {
// entry mismatch.
Expand Down Expand Up @@ -242,7 +244,7 @@ impl<T: Clone> Dict<T> {
}

/// Retrieve and delete a key
pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
let value = self.unchecked_get(index);
self.unchecked_delete(index);
Expand Down Expand Up @@ -273,23 +275,68 @@ enum LookupResult {
Existing(EntryIndex), // Existing record, index into entries
}

#[cfg_attr(feature = "flame-it", flame())]
fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> {
let raw_hash = vm._hash(object)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
/// Types implementing this trait can be used to index
/// the dictionary. Typical usecases are:
/// - PyObjectRef -> arbitrary python type used as key
/// - str -> string reference used as key, this is often used internally
pub trait DictKey {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>;
fn do_is(&self, other: &PyObjectRef) -> bool;
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>;
}

/// Invoke __eq__ on two keys
fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> {
let result = vm._eq(key1.clone(), key2.clone())?;
objbool::boolval(vm, result)
/// Implement trait for PyObjectRef such that we can use python objects
/// to index dictionaries.
impl DictKey for PyObjectRef {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
let raw_hash = vm._hash(self)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}

fn do_is(&self, other: &PyObjectRef) -> bool {
self.is(other)
}

fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
let result = vm._eq(self.clone(), other_key.clone())?;
objbool::boolval(vm, result)
}
}

/// Implement trait for the str type, so that we can use strings
/// to index dictionaries.
impl DictKey for String {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
impl DictKey for String {
impl DictKey for str {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this, without success. I get this compilation error:

 --> vm/src/dictdatatype.rs:363:24
    |
363 |         let val = dict.get(&vm, "x").unwrap().unwrap();
    |                        ^^^ doesn't have a size known at compile-time

Copy link
Member

@coolreader18 coolreader18 Jul 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think it's because it's trying to make a double-fat pointer, with both a vtable and a slice length, and it doesn't like that.

fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> {
// follow a similar route as the hashing of PyStringRef
let raw_hash = pyhash::hash_value(self).to_bigint().unwrap();
let raw_hash = pyhash::hash_bigint(&raw_hash);
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}

fn do_is(&self, _other: &PyObjectRef) -> bool {
// No matter who the other pyobject is, we are never the same thing, since
// we are a str, not a pyobject.
false
}

fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
if let Some(py_str_value) = other_key.payload::<PyString>() {
Ok(&py_str_value.value == self)
} else {
// Fall back to PyString implementation.
let s = vm.new_str(self.to_string());
s.do_eq(vm, other_key)
}
}
}

#[cfg(test)]
mod tests {
use super::{Dict, VirtualMachine};
use super::{Dict, DictKey, VirtualMachine};

#[test]
fn test_insert() {
Expand All @@ -313,9 +360,40 @@ mod tests {
dict.delete(&vm, &key1).unwrap();
assert_eq!(1, dict.len());

dict.insert(&vm, &key1, value2).unwrap();
dict.insert(&vm, &key1, value2.clone()).unwrap();
assert_eq!(2, dict.len());

assert_eq!(true, dict.contains(&vm, &key1).unwrap());
assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap());

let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap();
vm._eq(val, value2)
.expect("retrieved value must be equal to inserted value.");
}

macro_rules! hash_tests {
($($name:ident: $example_hash:expr,)*) => {
$(
#[test]
fn $name() {
check_hash_equivalence($example_hash);
}
)*
}
}

hash_tests! {
test_abc: "abc",
test_x: "x",
}

fn check_hash_equivalence(text: &str) {
let vm: VirtualMachine = Default::default();
let value1 = text.to_string();
let value2 = vm.new_str(value1.clone());

let hash1 = value1.do_hash(&vm).expect("Hash should not fail.");
let hash2 = value2.do_hash(&vm).expect("Hash should not fail.");
assert_eq!(hash1, hash2);
}
}
18 changes: 18 additions & 0 deletions vm/src/obj/objdict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::vm::{ReprGuard, VirtualMachine};
use super::objbool;
use super::objiter;
use super::objstr;
use super::objtype;
use crate::dictdatatype;
use crate::obj::objtype::PyClassRef;
use crate::pyobject::PyClassImpl;
Expand Down Expand Up @@ -316,6 +317,23 @@ impl PyDictRef {
pub fn size(&self) -> dictdatatype::DictSize {
self.entries.borrow().size()
}

pub fn get_item_option<T: IntoPyObject>(
&self,
key: T,
vm: &VirtualMachine,
) -> PyResult<Option<PyObjectRef>> {
match self.get_item(key, vm) {
Ok(value) => Ok(Some(value)),
Err(exc) => {
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
Ok(None)
} else {
Err(exc)
}
}
}
}
}

impl ItemProtocol for PyDictRef {
Expand Down
5 changes: 1 addition & 4 deletions vm/src/obj/objint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,7 @@ impl PyInt {

#[pymethod(name = "__hash__")]
pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
match self.value.to_i64() {
Some(value) => (value % pyhash::MODULUS as i64),
None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
}
pyhash::hash_bigint(&self.value)
}

#[pymethod(name = "__abs__")]
Expand Down
2 changes: 1 addition & 1 deletion vm/src/obj/objsuper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::obj::objfunction::PyMethod;
use crate::obj::objstr;
use crate::obj::objtype::{PyClass, PyClassRef};
use crate::pyobject::{
ItemProtocol, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
};
use crate::scope::NameProtocol;
use crate::vm::VirtualMachine;
Expand Down
9 changes: 9 additions & 0 deletions vm/src/pyhash.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use num_bigint::BigInt;
use num_traits::ToPrimitive;
use std::hash::{Hash, Hasher};

use crate::obj::objfloat;
Expand Down Expand Up @@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
}
Ok(hasher.finish() as PyHash)
}

pub fn hash_bigint(value: &BigInt) -> PyHash {
match value.to_i64() {
Some(i64_value) => (i64_value % MODULUS as i64),
None => (value % MODULUS).to_i64().unwrap(),
}
}
18 changes: 0 additions & 18 deletions vm/src/pyobject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1044,24 +1044,6 @@ pub trait ItemProtocol {
vm: &VirtualMachine,
) -> PyResult;
fn del_item<T: IntoPyObject>(&self, key: T, vm: &VirtualMachine) -> PyResult;

#[cfg_attr(feature = "flame-it", flame("ItemProtocol"))]
fn get_item_option<T: IntoPyObject>(
&self,
key: T,
vm: &VirtualMachine,
) -> PyResult<Option<PyObjectRef>> {
match self.get_item(key, vm) {
Ok(value) => Ok(Some(value)),
Err(exc) => {
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
Ok(None)
} else {
Err(exc)
}
}
}
}
}

impl ItemProtocol for PyObjectRef {
Expand Down