1
1
use crate :: obj:: objbool;
2
+ use crate :: obj:: objstr:: PyString ;
2
3
use crate :: pyhash;
3
4
use crate :: pyobject:: { IdProtocol , PyObjectRef , PyResult } ;
4
5
use crate :: vm:: VirtualMachine ;
6
+ use num_bigint:: ToBigInt ;
5
7
/// Ordered dictionary implementation.
6
8
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
7
9
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
@@ -93,7 +95,7 @@ impl<T: Clone> Dict<T> {
93
95
}
94
96
}
95
97
96
- pub fn contains ( & self , vm : & VirtualMachine , key : & PyObjectRef ) -> PyResult < bool > {
98
+ pub fn contains < K : DictKey > ( & self , vm : & VirtualMachine , key : & K ) -> PyResult < bool > {
97
99
if let LookupResult :: Existing ( _) = self . lookup ( vm, key) ? {
98
100
Ok ( true )
99
101
} else {
@@ -111,7 +113,7 @@ impl<T: Clone> Dict<T> {
111
113
112
114
/// Retrieve a key
113
115
#[ cfg_attr( feature = "flame-it" , flame( "Dict" ) ) ]
114
- pub fn get ( & self , vm : & VirtualMachine , key : & PyObjectRef ) -> PyResult < Option < T > > {
116
+ pub fn get < K : DictKey > ( & self , vm : & VirtualMachine , key : & K ) -> PyResult < Option < T > > {
115
117
if let LookupResult :: Existing ( index) = self . lookup ( vm, key) ? {
116
118
Ok ( Some ( self . unchecked_get ( index) ) )
117
119
} else {
@@ -149,7 +151,7 @@ impl<T: Clone> Dict<T> {
149
151
key : & PyObjectRef ,
150
152
value : T ,
151
153
) -> PyResult < ( ) > {
152
- match self . lookup ( vm, & key) ? {
154
+ match self . lookup ( vm, key) ? {
153
155
LookupResult :: Existing ( entry_index) => self . unchecked_delete ( entry_index) ,
154
156
LookupResult :: NewIndex {
155
157
hash_value,
@@ -199,8 +201,8 @@ impl<T: Clone> Dict<T> {
199
201
200
202
/// Lookup the index for the given key.
201
203
#[ cfg_attr( feature = "flame-it" , flame( "Dict" ) ) ]
202
- fn lookup ( & self , vm : & VirtualMachine , key : & PyObjectRef ) -> PyResult < LookupResult > {
203
- let hash_value = collection_hash ( vm, key ) ?;
204
+ fn lookup < K : DictKey > ( & self , vm : & VirtualMachine , key : & K ) -> PyResult < LookupResult > {
205
+ let hash_value = key . do_hash ( vm) ?;
204
206
let perturb = hash_value;
205
207
let mut hash_index: HashIndex = hash_value;
206
208
loop {
@@ -209,11 +211,11 @@ impl<T: Clone> Dict<T> {
209
211
let index = self . indices [ & hash_index] ;
210
212
if let Some ( entry) = & self . entries [ index] {
211
213
// Okay, we have an entry at this place
212
- if entry . key . is ( key) {
214
+ if key. do_is ( & entry . key ) {
213
215
// Literally the same object
214
216
break Ok ( LookupResult :: Existing ( index) ) ;
215
217
} else if entry. hash == hash_value {
216
- if do_eq ( vm, & entry. key , key) ? {
218
+ if key . do_eq ( vm, & entry. key ) ? {
217
219
break Ok ( LookupResult :: Existing ( index) ) ;
218
220
} else {
219
221
// entry mismatch.
@@ -242,7 +244,7 @@ impl<T: Clone> Dict<T> {
242
244
}
243
245
244
246
/// Retrieve and delete a key
245
- pub fn pop ( & mut self , vm : & VirtualMachine , key : & PyObjectRef ) -> PyResult < Option < T > > {
247
+ pub fn pop < K : DictKey > ( & mut self , vm : & VirtualMachine , key : & K ) -> PyResult < Option < T > > {
246
248
if let LookupResult :: Existing ( index) = self . lookup ( vm, key) ? {
247
249
let value = self . unchecked_get ( index) ;
248
250
self . unchecked_delete ( index) ;
@@ -273,23 +275,68 @@ enum LookupResult {
273
275
Existing ( EntryIndex ) , // Existing record, index into entries
274
276
}
275
277
276
- #[ cfg_attr( feature = "flame-it" , flame( ) ) ]
277
- fn collection_hash ( vm : & VirtualMachine , object : & PyObjectRef ) -> PyResult < HashValue > {
278
- let raw_hash = vm. _hash ( object) ?;
279
- let mut hasher = DefaultHasher :: new ( ) ;
280
- raw_hash. hash ( & mut hasher) ;
281
- Ok ( hasher. finish ( ) as HashValue )
278
+ /// Types implementing this trait can be used to index
279
+ /// the dictionary. Typical usecases are:
280
+ /// - PyObjectRef -> arbitrary python type used as key
281
+ /// - str -> string reference used as key, this is often used internally
282
+ pub trait DictKey {
283
+ fn do_hash ( & self , vm : & VirtualMachine ) -> PyResult < HashValue > ;
284
+ fn do_is ( & self , other : & PyObjectRef ) -> bool ;
285
+ fn do_eq ( & self , vm : & VirtualMachine , other_key : & PyObjectRef ) -> PyResult < bool > ;
282
286
}
283
287
284
- /// Invoke __eq__ on two keys
285
- fn do_eq ( vm : & VirtualMachine , key1 : & PyObjectRef , key2 : & PyObjectRef ) -> Result < bool , PyObjectRef > {
286
- let result = vm. _eq ( key1. clone ( ) , key2. clone ( ) ) ?;
287
- objbool:: boolval ( vm, result)
288
+ /// Implement trait for PyObjectRef such that we can use python objects
289
+ /// to index dictionaries.
290
+ impl DictKey for PyObjectRef {
291
+ fn do_hash ( & self , vm : & VirtualMachine ) -> PyResult < HashValue > {
292
+ let raw_hash = vm. _hash ( self ) ?;
293
+ let mut hasher = DefaultHasher :: new ( ) ;
294
+ raw_hash. hash ( & mut hasher) ;
295
+ Ok ( hasher. finish ( ) as HashValue )
296
+ }
297
+
298
+ fn do_is ( & self , other : & PyObjectRef ) -> bool {
299
+ self . is ( other)
300
+ }
301
+
302
+ fn do_eq ( & self , vm : & VirtualMachine , other_key : & PyObjectRef ) -> PyResult < bool > {
303
+ let result = vm. _eq ( self . clone ( ) , other_key. clone ( ) ) ?;
304
+ objbool:: boolval ( vm, result)
305
+ }
306
+ }
307
+
308
+ /// Implement trait for the str type, so that we can use strings
309
+ /// to index dictionaries.
310
+ impl DictKey for String {
311
+ fn do_hash ( & self , _vm : & VirtualMachine ) -> PyResult < HashValue > {
312
+ // follow a similar route as the hashing of PyStringRef
313
+ let raw_hash = pyhash:: hash_value ( self ) . to_bigint ( ) . unwrap ( ) ;
314
+ let raw_hash = pyhash:: hash_bigint ( & raw_hash) ;
315
+ let mut hasher = DefaultHasher :: new ( ) ;
316
+ raw_hash. hash ( & mut hasher) ;
317
+ Ok ( hasher. finish ( ) as HashValue )
318
+ }
319
+
320
+ fn do_is ( & self , _other : & PyObjectRef ) -> bool {
321
+ // No matter who the other pyobject is, we are never the same thing, since
322
+ // we are a str, not a pyobject.
323
+ false
324
+ }
325
+
326
+ fn do_eq ( & self , vm : & VirtualMachine , other_key : & PyObjectRef ) -> PyResult < bool > {
327
+ if let Some ( py_str_value) = other_key. payload :: < PyString > ( ) {
328
+ Ok ( & py_str_value. value == self )
329
+ } else {
330
+ // Fall back to PyString implementation.
331
+ let s = vm. new_str ( self . to_string ( ) ) ;
332
+ s. do_eq ( vm, other_key)
333
+ }
334
+ }
288
335
}
289
336
290
337
#[ cfg( test) ]
291
338
mod tests {
292
- use super :: { Dict , VirtualMachine } ;
339
+ use super :: { Dict , DictKey , VirtualMachine } ;
293
340
294
341
#[ test]
295
342
fn test_insert ( ) {
@@ -313,9 +360,40 @@ mod tests {
313
360
dict. delete ( & vm, & key1) . unwrap ( ) ;
314
361
assert_eq ! ( 1 , dict. len( ) ) ;
315
362
316
- dict. insert ( & vm, & key1, value2) . unwrap ( ) ;
363
+ dict. insert ( & vm, & key1, value2. clone ( ) ) . unwrap ( ) ;
317
364
assert_eq ! ( 2 , dict. len( ) ) ;
318
365
319
366
assert_eq ! ( true , dict. contains( & vm, & key1) . unwrap( ) ) ;
367
+ assert_eq ! ( true , dict. contains( & vm, & "x" . to_string( ) ) . unwrap( ) ) ;
368
+
369
+ let val = dict. get ( & vm, & "x" . to_string ( ) ) . unwrap ( ) . unwrap ( ) ;
370
+ vm. _eq ( val, value2)
371
+ . expect ( "retrieved value must be equal to inserted value." ) ;
372
+ }
373
+
374
+ macro_rules! hash_tests {
375
+ ( $( $name: ident: $example_hash: expr, ) * ) => {
376
+ $(
377
+ #[ test]
378
+ fn $name( ) {
379
+ check_hash_equivalence( $example_hash) ;
380
+ }
381
+ ) *
382
+ }
383
+ }
384
+
385
+ hash_tests ! {
386
+ test_abc: "abc" ,
387
+ test_x: "x" ,
388
+ }
389
+
390
+ fn check_hash_equivalence ( text : & str ) {
391
+ let vm: VirtualMachine = Default :: default ( ) ;
392
+ let value1 = text. to_string ( ) ;
393
+ let value2 = vm. new_str ( value1. clone ( ) ) ;
394
+
395
+ let hash1 = value1. do_hash ( & vm) . expect ( "Hash should not fail." ) ;
396
+ let hash2 = value2. do_hash ( & vm) . expect ( "Hash should not fail." ) ;
397
+ assert_eq ! ( hash1, hash2) ;
320
398
}
321
399
}
0 commit comments