@@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
394
394
{
395
395
struct lpm_trie * trie = container_of (map , struct lpm_trie , map );
396
396
struct bpf_lpm_trie_key * key = _key ;
397
- struct lpm_trie_node __rcu * * trim ;
398
- struct lpm_trie_node * node ;
397
+ struct lpm_trie_node __rcu * * trim , * * trim2 ;
398
+ struct lpm_trie_node * node , * parent ;
399
399
unsigned long irq_flags ;
400
400
unsigned int next_bit ;
401
401
size_t matchlen = 0 ;
@@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
407
407
raw_spin_lock_irqsave (& trie -> lock , irq_flags );
408
408
409
409
/* Walk the tree looking for an exact key/length match and keeping
410
- * track of where we could begin trimming the tree. The trim-point
411
- * is the sub-tree along the walk consisting of only single-child
412
- * intermediate nodes and ending at a leaf node that we want to
413
- * remove .
410
+ * track of the path we traverse. We will need to know the node
411
+ * we wish to delete, and the slot that points to the node we want
412
+ * to delete. We may also need to know the nodes parent and the
413
+ * slot that contains it .
414
414
*/
415
415
trim = & trie -> root ;
416
- node = rcu_dereference_protected (
417
- trie -> root , lockdep_is_held (& trie -> lock ));
418
- while (node ) {
416
+ trim2 = trim ;
417
+ parent = NULL ;
418
+ while ((node = rcu_dereference_protected (
419
+ * trim , lockdep_is_held (& trie -> lock )))) {
419
420
matchlen = longest_prefix_match (trie , node , key );
420
421
421
422
if (node -> prefixlen != matchlen ||
422
423
node -> prefixlen == key -> prefixlen )
423
424
break ;
424
425
426
+ parent = node ;
427
+ trim2 = trim ;
425
428
next_bit = extract_bit (key -> data , node -> prefixlen );
426
- /* If we hit a node that has more than one child or is a valid
427
- * prefix itself, do not remove it. Reset the root of the trim
428
- * path to its descendant on our path.
429
- */
430
- if (!(node -> flags & LPM_TREE_NODE_FLAG_IM ) ||
431
- (node -> child [0 ] && node -> child [1 ]))
432
- trim = & node -> child [next_bit ];
433
- node = rcu_dereference_protected (
434
- node -> child [next_bit ], lockdep_is_held (& trie -> lock ));
429
+ trim = & node -> child [next_bit ];
435
430
}
436
431
437
432
if (!node || node -> prefixlen != key -> prefixlen ||
@@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
442
437
443
438
trie -> n_entries -- ;
444
439
445
- /* If the node we are removing is not a leaf node , simply mark it
440
+ /* If the node we are removing has two children , simply mark it
446
441
* as intermediate and we are done.
447
442
*/
448
- if (rcu_access_pointer (node -> child [0 ]) ||
443
+ if (rcu_access_pointer (node -> child [0 ]) &&
449
444
rcu_access_pointer (node -> child [1 ])) {
450
445
node -> flags |= LPM_TREE_NODE_FLAG_IM ;
451
446
goto out ;
452
447
}
453
448
454
- /* trim should now point to the slot holding the start of a path from
455
- * zero or more intermediate nodes to our leaf node for deletion.
449
+ /* If the parent of the node we are about to delete is an intermediate
450
+ * node, and the deleted node doesn't have any children, we can delete
451
+ * the intermediate parent as well and promote its other child
452
+ * up the tree. Doing this maintains the invariant that all
453
+ * intermediate nodes have exactly 2 children and that there are no
454
+ * unnecessary intermediate nodes in the tree.
456
455
*/
457
- while ((node = rcu_dereference_protected (
458
- * trim , lockdep_is_held (& trie -> lock )))) {
459
- RCU_INIT_POINTER (* trim , NULL );
460
- trim = rcu_access_pointer (node -> child [0 ]) ?
461
- & node -> child [0 ] :
462
- & node -> child [1 ];
456
+ if (parent && (parent -> flags & LPM_TREE_NODE_FLAG_IM ) &&
457
+ !node -> child [0 ] && !node -> child [1 ]) {
458
+ if (node == rcu_access_pointer (parent -> child [0 ]))
459
+ rcu_assign_pointer (
460
+ * trim2 , rcu_access_pointer (parent -> child [1 ]));
461
+ else
462
+ rcu_assign_pointer (
463
+ * trim2 , rcu_access_pointer (parent -> child [0 ]));
464
+ kfree_rcu (parent , rcu );
463
465
kfree_rcu (node , rcu );
466
+ goto out ;
464
467
}
465
468
469
+ /* The node we are removing has either zero or one child. If there
470
+ * is a child, move it into the removed node's slot then delete
471
+ * the node. Otherwise just clear the slot and delete the node.
472
+ */
473
+ if (node -> child [0 ])
474
+ rcu_assign_pointer (* trim , rcu_access_pointer (node -> child [0 ]));
475
+ else if (node -> child [1 ])
476
+ rcu_assign_pointer (* trim , rcu_access_pointer (node -> child [1 ]));
477
+ else
478
+ RCU_INIT_POINTER (* trim , NULL );
479
+ kfree_rcu (node , rcu );
480
+
466
481
out :
467
482
raw_spin_unlock_irqrestore (& trie -> lock , irq_flags );
468
483
0 commit comments