Skip to content

Commit 02c02bf

Browse files
author
Matthew Wilcox
committed
xarray: Change definition of sibling entries
Instead of storing a pointer to the slot containing the canonical entry, store the offset of the slot. Produces slightly more efficient code (~300 bytes) and simplifies the implementation. Signed-off-by: Matthew Wilcox <willy@infradead.org> Reviewed-by: Josef Bacik <jbacik@fb.com>
1 parent 3159f94 commit 02c02bf

File tree

6 files changed

+121
-50
lines changed

6 files changed

+121
-50
lines changed

include/linux/radix-tree.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
5959

6060
#define RADIX_TREE_MAX_TAGS 3
6161

62-
#ifndef RADIX_TREE_MAP_SHIFT
63-
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
64-
#endif
65-
62+
#define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT
6663
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
6764
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
6865

include/linux/xarray.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
* x1: Value entry or tagged pointer
2323
*
2424
* Attempting to store internal entries in the XArray is a bug.
25+
*
26+
* Most internal entries are pointers to the next node in the tree.
27+
* The following internal entries have a special meaning:
28+
*
29+
* 0-62: Sibling entries
30+
* 256: Retry entry
2531
*/
2632

2733
#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1)
@@ -111,6 +117,42 @@ static inline unsigned int xa_pointer_tag(void *entry)
111117
return (unsigned long)entry & 3UL;
112118
}
113119

120+
/*
121+
* xa_mk_internal() - Create an internal entry.
122+
* @v: Value to turn into an internal entry.
123+
*
124+
* Context: Any context.
125+
* Return: An XArray internal entry corresponding to this value.
126+
*/
127+
static inline void *xa_mk_internal(unsigned long v)
128+
{
129+
return (void *)((v << 2) | 2);
130+
}
131+
132+
/*
133+
* xa_to_internal() - Extract the value from an internal entry.
134+
* @entry: XArray entry.
135+
*
136+
* Context: Any context.
137+
* Return: The value which was stored in the internal entry.
138+
*/
139+
static inline unsigned long xa_to_internal(const void *entry)
140+
{
141+
return (unsigned long)entry >> 2;
142+
}
143+
144+
/*
145+
* xa_is_internal() - Is the entry an internal entry?
146+
* @entry: XArray entry.
147+
*
148+
* Context: Any context.
149+
* Return: %true if the entry is an internal entry.
150+
*/
151+
static inline bool xa_is_internal(const void *entry)
152+
{
153+
return ((unsigned long)entry & 3) == 2;
154+
}
155+
114156
#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
115157
#define xa_lock(xa) spin_lock(&(xa)->xa_lock)
116158
#define xa_unlock(xa) spin_unlock(&(xa)->xa_lock)
@@ -123,4 +165,54 @@ static inline unsigned int xa_pointer_tag(void *entry)
123165
#define xa_unlock_irqrestore(xa, flags) \
124166
spin_unlock_irqrestore(&(xa)->xa_lock, flags)
125167

168+
/* Everything below here is the Advanced API. Proceed with caution. */
169+
170+
/*
171+
* The xarray is constructed out of a set of 'chunks' of pointers. Choosing
172+
* the best chunk size requires some tradeoffs. A power of two recommends
173+
* itself so that we can walk the tree based purely on shifts and masks.
174+
* Generally, the larger the better; as the number of slots per level of the
175+
* tree increases, the less tall the tree needs to be. But that needs to be
176+
* balanced against the memory consumption of each node. On a 64-bit system,
177+
* xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we
178+
* doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
179+
*/
180+
#ifndef XA_CHUNK_SHIFT
181+
#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
182+
#endif
183+
#define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT)
184+
#define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1)
185+
186+
/* Private */
187+
static inline bool xa_is_node(const void *entry)
188+
{
189+
return xa_is_internal(entry) && (unsigned long)entry > 4096;
190+
}
191+
192+
/* Private */
193+
static inline void *xa_mk_sibling(unsigned int offset)
194+
{
195+
return xa_mk_internal(offset);
196+
}
197+
198+
/* Private */
199+
static inline unsigned long xa_to_sibling(const void *entry)
200+
{
201+
return xa_to_internal(entry);
202+
}
203+
204+
/**
205+
* xa_is_sibling() - Is the entry a sibling entry?
206+
* @entry: Entry retrieved from the XArray
207+
*
208+
* Return: %true if the entry is a sibling entry.
209+
*/
210+
static inline bool xa_is_sibling(const void *entry)
211+
{
212+
return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
213+
(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
214+
}
215+
216+
#define XA_RETRY_ENTRY xa_mk_internal(256)
217+
126218
#endif /* _LINUX_XARRAY_H */

lib/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,15 @@ config INTERVAL_TREE
399399

400400
for more information.
401401

402+
config XARRAY_MULTI
403+
bool
404+
help
405+
Support entries which occupy multiple consecutive indices in the
406+
XArray.
407+
402408
config RADIX_TREE_MULTIORDER
403409
bool
410+
select XARRAY_MULTI
404411

405412
config ASSOCIATIVE_ARRAY
406413
bool

lib/radix-tree.c

Lines changed: 19 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <linux/rcupdate.h>
3939
#include <linux/slab.h>
4040
#include <linux/string.h>
41+
#include <linux/xarray.h>
4142

4243

4344
/* Number of nodes in fully populated tree of given height */
@@ -98,24 +99,7 @@ static inline void *node_to_entry(void *ptr)
9899
return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
99100
}
100101

101-
#define RADIX_TREE_RETRY node_to_entry(NULL)
102-
103-
#ifdef CONFIG_RADIX_TREE_MULTIORDER
104-
/* Sibling slots point directly to another slot in the same node */
105-
static inline
106-
bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
107-
{
108-
void __rcu **ptr = node;
109-
return (parent->slots <= ptr) &&
110-
(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
111-
}
112-
#else
113-
static inline
114-
bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
115-
{
116-
return false;
117-
}
118-
#endif
102+
#define RADIX_TREE_RETRY XA_RETRY_ENTRY
119103

120104
static inline unsigned long
121105
get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,16 +113,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
129113
unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
130114
void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
131115

132-
#ifdef CONFIG_RADIX_TREE_MULTIORDER
133-
if (radix_tree_is_internal_node(entry)) {
134-
if (is_sibling_entry(parent, entry)) {
135-
void __rcu **sibentry;
136-
sibentry = (void __rcu **) entry_to_node(entry);
137-
offset = get_slot_offset(parent, sibentry);
138-
entry = rcu_dereference_raw(*sibentry);
139-
}
116+
if (xa_is_sibling(entry)) {
117+
offset = xa_to_sibling(entry);
118+
entry = rcu_dereference_raw(parent->slots[offset]);
140119
}
141-
#endif
142120

143121
*nodep = (void *)entry;
144122
return offset;
@@ -300,10 +278,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
300278
} else if (!radix_tree_is_internal_node(entry)) {
301279
pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
302280
entry, i, first, last, node);
303-
} else if (is_sibling_entry(node, entry)) {
281+
} else if (xa_is_sibling(entry)) {
304282
pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
305283
entry, i, first, last, node,
306-
*(void **)entry_to_node(entry));
284+
node->slots[xa_to_sibling(entry)]);
307285
} else {
308286
dump_node(entry_to_node(entry), first);
309287
}
@@ -881,8 +859,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
881859

882860
for (;;) {
883861
void *entry = rcu_dereference_raw(child->slots[offset]);
884-
if (radix_tree_is_internal_node(entry) && child->shift &&
885-
!is_sibling_entry(child, entry)) {
862+
if (xa_is_node(entry) && child->shift) {
886863
child = entry_to_node(entry);
887864
offset = 0;
888865
continue;
@@ -904,7 +881,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
904881
static inline int insert_entries(struct radix_tree_node *node,
905882
void __rcu **slot, void *item, unsigned order, bool replace)
906883
{
907-
struct radix_tree_node *child;
884+
void *sibling;
908885
unsigned i, n, tag, offset, tags = 0;
909886

910887
if (node) {
@@ -922,7 +899,7 @@ static inline int insert_entries(struct radix_tree_node *node,
922899
offset = offset & ~(n - 1);
923900
slot = &node->slots[offset];
924901
}
925-
child = node_to_entry(slot);
902+
sibling = xa_mk_sibling(offset);
926903

927904
for (i = 0; i < n; i++) {
928905
if (slot[i]) {
@@ -939,7 +916,7 @@ static inline int insert_entries(struct radix_tree_node *node,
939916
for (i = 0; i < n; i++) {
940917
struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
941918
if (i) {
942-
rcu_assign_pointer(slot[i], child);
919+
rcu_assign_pointer(slot[i], sibling);
943920
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
944921
if (tags & (1 << tag))
945922
tag_clear(node, tag, offset + i);
@@ -949,9 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node,
949926
if (tags & (1 << tag))
950927
tag_set(node, tag, offset);
951928
}
952-
if (radix_tree_is_internal_node(old) &&
953-
!is_sibling_entry(node, old) &&
954-
(old != RADIX_TREE_RETRY))
929+
if (xa_is_node(old))
955930
radix_tree_free_nodes(old);
956931
if (xa_is_value(old))
957932
node->exceptional--;
@@ -1112,18 +1087,17 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
11121087
void __rcu **slot, int count, int exceptional)
11131088
{
11141089
#ifdef CONFIG_RADIX_TREE_MULTIORDER
1115-
void *ptr = node_to_entry(slot);
1116-
unsigned offset = get_slot_offset(node, slot) + 1;
1090+
unsigned offset = get_slot_offset(node, slot);
1091+
void *ptr = xa_mk_sibling(offset);
11171092

1118-
while (offset < RADIX_TREE_MAP_SIZE) {
1093+
while (++offset < RADIX_TREE_MAP_SIZE) {
11191094
if (rcu_dereference_raw(node->slots[offset]) != ptr)
11201095
break;
11211096
if (count < 0) {
11221097
node->slots[offset] = NULL;
11231098
node->count--;
11241099
}
11251100
node->exceptional += exceptional;
1126-
offset++;
11271101
}
11281102
#endif
11291103
}
@@ -1319,8 +1293,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
13191293
tags |= 1 << tag;
13201294

13211295
for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1322-
if (!is_sibling_entry(parent,
1323-
rcu_dereference_raw(parent->slots[end])))
1296+
if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
13241297
break;
13251298
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
13261299
if (tags & (1 << tag))
@@ -1618,7 +1591,7 @@ static void __rcu **skip_siblings(struct radix_tree_node **nodep,
16181591
{
16191592
while (iter->index < iter->next_index) {
16201593
*nodep = rcu_dereference_raw(*slot);
1621-
if (*nodep && !is_sibling_entry(iter->node, *nodep))
1594+
if (*nodep && !xa_is_sibling(*nodep))
16221595
return slot;
16231596
slot++;
16241597
iter->index = __radix_tree_iter_add(iter, 1);
@@ -1769,7 +1742,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
17691742
while (++offset < RADIX_TREE_MAP_SIZE) {
17701743
void *slot = rcu_dereference_raw(
17711744
node->slots[offset]);
1772-
if (is_sibling_entry(node, slot))
1745+
if (xa_is_sibling(slot))
17731746
continue;
17741747
if (slot)
17751748
break;
@@ -2283,6 +2256,7 @@ void __init radix_tree_init(void)
22832256

22842257
BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
22852258
BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
2259+
BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
22862260
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
22872261
sizeof(struct radix_tree_node), 0,
22882262
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,

tools/testing/radix-tree/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ idr.c: ../../../lib/idr.c
4646

4747
generated/map-shift.h:
4848
@if ! grep -qws $(SHIFT) generated/map-shift.h; then \
49-
echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \
49+
echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
5050
generated/map-shift.h; \
5151
fi
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
#define CONFIG_RADIX_TREE_MULTIORDER 1
2+
#define CONFIG_XARRAY_MULTI 1

0 commit comments

Comments
 (0)