Skip to content

Commit cc330b5

Browse files
committed
Merge branch 'rhashtable-next'
Herbert Xu says: ==================== rhashtable: Multiple rehashing This series introduces multiple rehashing. Recall that the original implementation in br_multicast used two list pointers per hash node and therefore is limited to at most one rehash at a time since you need one list pointer for the old table and one for the new table. Thanks to Josh Triplett's suggestion of using a single list pointer we're no longer limited by that. So it is perfectly OK to have an arbitrary number of tables in existence at any one time. The reader and removal simply has to walk from the oldest table to the newest table in order not to miss anything. Insertion without lookup are just as easy as we simply go to the last table that we can find and add the entry there. However, insertion with uniqueness lookup is more complicated because we need to ensure that two simultaneous insertions of the same key do not both succeed. To achieve this, all insertions including those without lookups are required to obtain the bucket lock from the oldest hash table that is still alive. This is determined by having the rehasher (there is only one rehashing thread in the system) keep a pointer of where it is up to. If a bucket has already been rehashed then it is dead, i.e., there cannot be any more insertions to it, otherwise it is considered alive. This guarantees that the same key cannot be inserted in two different tables in parallel. Patch 1 is actually a bug fix for the walker. Patch 2-5 eliminates unnecessary out-of-line copies of jhash. Patch 6 makes rhashtable_shrink shrink to fit. Patch 7 introduces multiple rehashing. This means that if we decide to grow then we will grow regardless of whether the previous one has finished. However, this is still asynchronous meaning that if insertions come fast enough we may still end up with a table that is overutilised. Patch 8 adds support for GFP_ATOMIC allocations of struct bucket_table. Finally patch 9 enables immediate rehashing. This is done either when the table reaches 100% utilisation, or when the chain length exceeds 16 (the latter can be disabled on request, e.g., for nft_hash. With these patches the system should no longer have any trouble dealing with fast insertions on a small table. In the worst case you end up with a list of tables that's log N in length while the rehasher catches up. v3 restores rhashtable_shrink and fixes a number of bugs in the multiple rehashing patches (7 and 9). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents e167359 + ccd57b1 commit cc330b5

File tree

5 files changed

+255
-79
lines changed

5 files changed

+255
-79
lines changed

include/linux/rhashtable.h

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <linux/compiler.h>
2121
#include <linux/errno.h>
22+
#include <linux/jhash.h>
2223
#include <linux/list_nulls.h>
2324
#include <linux/workqueue.h>
2425
#include <linux/mutex.h>
@@ -102,8 +103,9 @@ struct rhashtable;
102103
* @max_size: Maximum size while expanding
103104
* @min_size: Minimum size while shrinking
104105
* @nulls_base: Base value to generate nulls marker
106+
* @insecure_elasticity: Set to true to disable chain length checks
105107
* @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
106-
* @hashfn: Function to hash key
108+
* @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
107109
* @obj_hashfn: Function to hash object
108110
* @obj_cmpfn: Function to compare key with object
109111
*/
@@ -115,6 +117,7 @@ struct rhashtable_params {
115117
unsigned int max_size;
116118
unsigned int min_size;
117119
u32 nulls_base;
120+
bool insecure_elasticity;
118121
size_t locks_mul;
119122
rht_hashfn_t hashfn;
120123
rht_obj_hashfn_t obj_hashfn;
@@ -125,6 +128,8 @@ struct rhashtable_params {
125128
* struct rhashtable - Hash table handle
126129
* @tbl: Bucket table
127130
* @nelems: Number of elements in table
131+
* @key_len: Key length for hashfn
132+
* @elasticity: Maximum chain length before rehash
128133
* @p: Configuration parameters
129134
* @run_work: Deferred worker to expand/shrink asynchronously
130135
* @mutex: Mutex to protect current/future table swapping
@@ -134,6 +139,8 @@ struct rhashtable {
134139
struct bucket_table __rcu *tbl;
135140
atomic_t nelems;
136141
bool being_destroyed;
142+
unsigned int key_len;
143+
unsigned int elasticity;
137144
struct rhashtable_params p;
138145
struct work_struct run_work;
139146
struct mutex mutex;
@@ -199,9 +206,31 @@ static inline unsigned int rht_key_hashfn(
199206
struct rhashtable *ht, const struct bucket_table *tbl,
200207
const void *key, const struct rhashtable_params params)
201208
{
202-
return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?:
203-
ht->p.key_len,
204-
tbl->hash_rnd));
209+
unsigned hash;
210+
211+
/* params must be equal to ht->p if it isn't constant. */
212+
if (!__builtin_constant_p(params.key_len))
213+
hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
214+
else if (params.key_len) {
215+
unsigned key_len = params.key_len;
216+
217+
if (params.hashfn)
218+
hash = params.hashfn(key, key_len, tbl->hash_rnd);
219+
else if (key_len & (sizeof(u32) - 1))
220+
hash = jhash(key, key_len, tbl->hash_rnd);
221+
else
222+
hash = jhash2(key, key_len / sizeof(u32),
223+
tbl->hash_rnd);
224+
} else {
225+
unsigned key_len = ht->p.key_len;
226+
227+
if (params.hashfn)
228+
hash = params.hashfn(key, key_len, tbl->hash_rnd);
229+
else
230+
hash = jhash(key, key_len, tbl->hash_rnd);
231+
}
232+
233+
return rht_bucket_index(tbl, hash);
205234
}
206235

207236
static inline unsigned int rht_head_hashfn(
@@ -241,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht,
241270
tbl->size > ht->p.min_size;
242271
}
243272

273+
/**
274+
* rht_grow_above_100 - returns true if nelems > table-size
275+
* @ht: hash table
276+
* @tbl: current table
277+
*/
278+
static inline bool rht_grow_above_100(const struct rhashtable *ht,
279+
const struct bucket_table *tbl)
280+
{
281+
return atomic_read(&ht->nelems) > tbl->size;
282+
}
283+
244284
/* The bucket lock is selected based on the hash and protects mutations
245285
* on a group of hash buckets.
246286
*
@@ -282,9 +322,7 @@ int rhashtable_init(struct rhashtable *ht,
282322
int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
283323
struct rhash_head *obj,
284324
struct bucket_table *old_tbl);
285-
286-
int rhashtable_expand(struct rhashtable *ht);
287-
int rhashtable_shrink(struct rhashtable *ht);
325+
int rhashtable_insert_rehash(struct rhashtable *ht);
288326

289327
int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
290328
void rhashtable_walk_exit(struct rhashtable_iter *iter);
@@ -507,43 +545,64 @@ static inline int __rhashtable_insert_fast(
507545
.ht = ht,
508546
.key = key,
509547
};
510-
int err = -EEXIST;
511548
struct bucket_table *tbl, *new_tbl;
512549
struct rhash_head *head;
513550
spinlock_t *lock;
551+
unsigned elasticity;
514552
unsigned hash;
553+
int err;
515554

555+
restart:
516556
rcu_read_lock();
517557

518558
tbl = rht_dereference_rcu(ht->tbl, ht);
519-
hash = rht_head_hashfn(ht, tbl, obj, params);
520-
lock = rht_bucket_lock(tbl, hash);
521559

522-
spin_lock_bh(lock);
523-
524-
/* Because we have already taken the bucket lock in tbl,
525-
* if we find that future_tbl is not yet visible then
526-
* that guarantees all other insertions of the same entry
527-
* will also grab the bucket lock in tbl because until
528-
* the rehash completes ht->tbl won't be changed.
560+
/* All insertions must grab the oldest table containing
561+
* the hashed bucket that is yet to be rehashed.
529562
*/
563+
for (;;) {
564+
hash = rht_head_hashfn(ht, tbl, obj, params);
565+
lock = rht_bucket_lock(tbl, hash);
566+
spin_lock_bh(lock);
567+
568+
if (tbl->rehash <= hash)
569+
break;
570+
571+
spin_unlock_bh(lock);
572+
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
573+
}
574+
530575
new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
531576
if (unlikely(new_tbl)) {
532577
err = rhashtable_insert_slow(ht, key, obj, new_tbl);
578+
if (err == -EAGAIN)
579+
goto slow_path;
533580
goto out;
534581
}
535582

536-
if (!key)
537-
goto skip_lookup;
583+
if (unlikely(rht_grow_above_100(ht, tbl))) {
584+
slow_path:
585+
spin_unlock_bh(lock);
586+
rcu_read_unlock();
587+
err = rhashtable_insert_rehash(ht);
588+
if (err)
589+
return err;
590+
591+
goto restart;
592+
}
538593

594+
err = -EEXIST;
595+
elasticity = ht->elasticity;
539596
rht_for_each(head, tbl, hash) {
540-
if (unlikely(!(params.obj_cmpfn ?
597+
if (key &&
598+
unlikely(!(params.obj_cmpfn ?
541599
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
542600
rhashtable_compare(&arg, rht_obj(ht, head)))))
543601
goto out;
602+
if (!--elasticity)
603+
goto slow_path;
544604
}
545605

546-
skip_lookup:
547606
err = 0;
548607

549608
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);

0 commit comments

Comments
 (0)