Skip to content

Commit b5d7388

Browse files
kraigatgoogdavem330
authored andcommitted
bpf: Optimize lpm trie delete
Before the delete operator was added, this datastructure maintained an invariant that intermediate nodes were only present when necessary to build the tree. This patch updates the delete operation to reinstate that invariant by removing unnecessary intermediate nodes after a node is removed and thus keeping the tree structure at a minimal size. Suggested-by: Daniel Mack <daniel@zonque.org> Signed-off-by: Craig Gallek <kraig@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent d835b63 commit b5d7388

File tree

1 file changed

+43
-28
lines changed

1 file changed

+43
-28
lines changed

kernel/bpf/lpm_trie.c

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
394394
{
395395
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
396396
struct bpf_lpm_trie_key *key = _key;
397-
struct lpm_trie_node __rcu **trim;
398-
struct lpm_trie_node *node;
397+
struct lpm_trie_node __rcu **trim, **trim2;
398+
struct lpm_trie_node *node, *parent;
399399
unsigned long irq_flags;
400400
unsigned int next_bit;
401401
size_t matchlen = 0;
@@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
407407
raw_spin_lock_irqsave(&trie->lock, irq_flags);
408408

409409
/* Walk the tree looking for an exact key/length match and keeping
410-
* track of where we could begin trimming the tree. The trim-point
411-
* is the sub-tree along the walk consisting of only single-child
412-
* intermediate nodes and ending at a leaf node that we want to
413-
* remove.
410+
* track of the path we traverse. We will need to know the node
411+
* we wish to delete, and the slot that points to the node we want
412+
* to delete. We may also need to know the nodes parent and the
413+
* slot that contains it.
414414
*/
415415
trim = &trie->root;
416-
node = rcu_dereference_protected(
417-
trie->root, lockdep_is_held(&trie->lock));
418-
while (node) {
416+
trim2 = trim;
417+
parent = NULL;
418+
while ((node = rcu_dereference_protected(
419+
*trim, lockdep_is_held(&trie->lock)))) {
419420
matchlen = longest_prefix_match(trie, node, key);
420421

421422
if (node->prefixlen != matchlen ||
422423
node->prefixlen == key->prefixlen)
423424
break;
424425

426+
parent = node;
427+
trim2 = trim;
425428
next_bit = extract_bit(key->data, node->prefixlen);
426-
/* If we hit a node that has more than one child or is a valid
427-
* prefix itself, do not remove it. Reset the root of the trim
428-
* path to its descendant on our path.
429-
*/
430-
if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
431-
(node->child[0] && node->child[1]))
432-
trim = &node->child[next_bit];
433-
node = rcu_dereference_protected(
434-
node->child[next_bit], lockdep_is_held(&trie->lock));
429+
trim = &node->child[next_bit];
435430
}
436431

437432
if (!node || node->prefixlen != key->prefixlen ||
@@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
442437

443438
trie->n_entries--;
444439

445-
/* If the node we are removing is not a leaf node, simply mark it
440+
/* If the node we are removing has two children, simply mark it
446441
* as intermediate and we are done.
447442
*/
448-
if (rcu_access_pointer(node->child[0]) ||
443+
if (rcu_access_pointer(node->child[0]) &&
449444
rcu_access_pointer(node->child[1])) {
450445
node->flags |= LPM_TREE_NODE_FLAG_IM;
451446
goto out;
452447
}
453448

454-
/* trim should now point to the slot holding the start of a path from
455-
* zero or more intermediate nodes to our leaf node for deletion.
449+
/* If the parent of the node we are about to delete is an intermediate
450+
* node, and the deleted node doesn't have any children, we can delete
451+
* the intermediate parent as well and promote its other child
452+
* up the tree. Doing this maintains the invariant that all
453+
* intermediate nodes have exactly 2 children and that there are no
454+
* unnecessary intermediate nodes in the tree.
456455
*/
457-
while ((node = rcu_dereference_protected(
458-
*trim, lockdep_is_held(&trie->lock)))) {
459-
RCU_INIT_POINTER(*trim, NULL);
460-
trim = rcu_access_pointer(node->child[0]) ?
461-
&node->child[0] :
462-
&node->child[1];
456+
if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) &&
457+
!node->child[0] && !node->child[1]) {
458+
if (node == rcu_access_pointer(parent->child[0]))
459+
rcu_assign_pointer(
460+
*trim2, rcu_access_pointer(parent->child[1]));
461+
else
462+
rcu_assign_pointer(
463+
*trim2, rcu_access_pointer(parent->child[0]));
464+
kfree_rcu(parent, rcu);
463465
kfree_rcu(node, rcu);
466+
goto out;
464467
}
465468

469+
/* The node we are removing has either zero or one child. If there
470+
* is a child, move it into the removed node's slot then delete
471+
* the node. Otherwise just clear the slot and delete the node.
472+
*/
473+
if (node->child[0])
474+
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0]));
475+
else if (node->child[1])
476+
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
477+
else
478+
RCU_INIT_POINTER(*trim, NULL);
479+
kfree_rcu(node, rcu);
480+
466481
out:
467482
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
468483

0 commit comments

Comments
 (0)