Skip to content

Commit a7c910e

Browse files
committed
net/art: implement the Table type, a multi-level art route table.
Updates tailscale#7781 │ sec/op │ TableInsertion/ipv4/10 1.562µ ± 2% TableInsertion/ipv4/100 2.398µ ± 5% TableInsertion/ipv4/1000 2.097µ ± 3% TableInsertion/ipv4/10000 2.756µ ± 4% TableInsertion/ipv4/100000 2.473µ ± 13% TableInsertion/ipv6/10 7.649µ ± 2% TableInsertion/ipv6/100 12.09µ ± 3% TableInsertion/ipv6/1000 14.84µ ± 5% TableInsertion/ipv6/10000 14.72µ ± 8% TableInsertion/ipv6/100000 13.23µ ± 41% TableDelete/ipv4/10 378.4n ± 5% TableDelete/ipv4/100 366.9n ± 3% TableDelete/ipv4/1000 418.6n ± 3% TableDelete/ipv4/10000 609.2n ± 11% TableDelete/ipv4/100000 679.2n ± 28% TableDelete/ipv6/10 504.2n ± 4% TableDelete/ipv6/100 959.5n ± 12% TableDelete/ipv6/1000 1.436µ ± 6% TableDelete/ipv6/10000 1.772µ ± 15% TableDelete/ipv6/100000 1.172µ ± 113% TableGet/ipv4/10 32.14n ± 11% TableGet/ipv4/100 38.58n ± 2% TableGet/ipv4/1000 45.03n ± 2% TableGet/ipv4/10000 52.90n ± 7% TableGet/ipv4/100000 135.2n ± 11% TableGet/ipv6/10 41.55n ± 1% TableGet/ipv6/100 44.78n ± 2% TableGet/ipv6/1000 49.03n ± 2% TableGet/ipv6/10000 65.38n ± 5% TableGet/ipv6/100000 525.0n ± 39% │ avg-B/op │ TableInsertion/ipv4/10 25.18Ki ± 0% TableInsertion/ipv4/100 17.63Ki ± 0% TableInsertion/ipv4/1000 14.14Ki ± 0% TableInsertion/ipv4/10000 12.92Ki ± 0% TableInsertion/ipv4/100000 11.13Ki ± 0% TableInsertion/ipv6/10 76.87Ki ± 0% TableInsertion/ipv6/100 98.33Ki ± 0% TableInsertion/ipv6/1000 91.44Ki ± 0% TableInsertion/ipv6/10000 90.39Ki ± 0% TableInsertion/ipv6/100000 87.19Ki ± 0% TableDelete/ipv4/10 3.230 ± 0% TableDelete/ipv4/100 4.020 ± 0% TableDelete/ipv4/1000 3.990 ± 0% TableDelete/ipv4/10000 4.000 ± 0% TableDelete/ipv4/100000 4.000 ± 0% TableDelete/ipv6/10 16.00 ± 0% TableDelete/ipv6/100 16.00 ± 0% TableDelete/ipv6/1000 16.00 ± 0% TableDelete/ipv6/10000 16.00 ± 0% TableDelete/ipv6/100000 16.00 ± 0% │ avg-allocs/op │ TableInsertion/ipv4/10 2.900 ± 0% TableInsertion/ipv4/100 2.330 ± 0% TableInsertion/ipv4/1000 2.070 ± 0% TableInsertion/ipv4/10000 1.980 ± 0% TableInsertion/ipv4/100000 1.840 ± 0% TableInsertion/ipv6/10 6.800 ± 0% TableInsertion/ipv6/100 8.420 ± 0% TableInsertion/ipv6/1000 7.900 ± 0% TableInsertion/ipv6/10000 7.820 ± 0% TableInsertion/ipv6/100000 7.580 ± 0% TableDelete/ipv4/10 1.000 ± 0% TableDelete/ipv4/100 1.000 ± 0% TableDelete/ipv4/1000 1.000 ± 0% TableDelete/ipv4/10000 1.000 ± 0% TableDelete/ipv4/100000 1.000 ± 0% TableDelete/ipv6/10 1.000 ± 0% TableDelete/ipv6/100 1.000 ± 0% TableDelete/ipv6/1000 1.000 ± 0% TableDelete/ipv6/10000 1.000 ± 0% TableDelete/ipv6/100000 1.000 ± 0% │ routes/s │ TableInsertion/ipv4/10 640.3k ± 2% TableInsertion/ipv4/100 417.1k ± 5% TableInsertion/ipv4/1000 477.0k ± 3% TableInsertion/ipv4/10000 362.8k ± 5% TableInsertion/ipv4/100000 404.5k ± 15% TableInsertion/ipv6/10 130.7k ± 1% TableInsertion/ipv6/100 82.69k ± 3% TableInsertion/ipv6/1000 67.37k ± 5% TableInsertion/ipv6/10000 67.93k ± 9% TableInsertion/ipv6/100000 75.63k ± 29% TableDelete/ipv4/10 2.642M ± 6% TableDelete/ipv4/100 2.726M ± 3% TableDelete/ipv4/1000 2.389M ± 3% TableDelete/ipv4/10000 1.641M ± 12% TableDelete/ipv4/100000 1.472M ± 27% TableDelete/ipv6/10 1.984M ± 4% TableDelete/ipv6/100 1.042M ± 11% TableDelete/ipv6/1000 696.5k ± 6% TableDelete/ipv6/10000 564.4k ± 13% TableDelete/ipv6/100000 853.6k ± 53% │ addrs/s │ TableGet/ipv4/10 31.11M ± 10% TableGet/ipv4/100 25.92M ± 2% TableGet/ipv4/1000 22.21M ± 2% TableGet/ipv4/10000 18.91M ± 8% TableGet/ipv4/100000 7.397M ± 12% TableGet/ipv6/10 24.07M ± 1% TableGet/ipv6/100 22.33M ± 2% TableGet/ipv6/1000 20.40M ± 2% TableGet/ipv6/10000 15.30M ± 5% TableGet/ipv6/100000 1.905M ± 28% │ B/op │ TableGet/ipv4/10 4.000 ± 0% TableGet/ipv4/100 4.000 ± 0% TableGet/ipv4/1000 4.000 ± 0% TableGet/ipv4/10000 4.000 ± 0% TableGet/ipv4/100000 4.000 ± 0% TableGet/ipv6/10 16.00 ± 0% TableGet/ipv6/100 16.00 ± 0% TableGet/ipv6/1000 16.00 ± 0% TableGet/ipv6/10000 16.00 ± 0% TableGet/ipv6/100000 16.00 ± 0% │ allocs/op │ TableGet/ipv4/10 1.000 ± 0% TableGet/ipv4/100 1.000 ± 0% TableGet/ipv4/1000 1.000 ± 0% TableGet/ipv4/10000 1.000 ± 0% TableGet/ipv4/100000 1.000 ± 0% TableGet/ipv6/10 1.000 ± 0% TableGet/ipv6/100 1.000 ± 0% TableGet/ipv6/1000 1.000 ± 0% TableGet/ipv6/10000 1.000 ± 0% TableGet/ipv6/100000 1.000 ± 0% Signed-off-by: David Anderson <danderson@tailscale.com>
1 parent edb02b6 commit a7c910e

File tree

4 files changed

+708
-6
lines changed

4 files changed

+708
-6
lines changed

net/art/stride_table.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ func (t *strideTable[T]) getOrCreateChild(addr uint8) *strideTable[T] {
8282
return t.entries[idx].child
8383
}
8484

85+
func (t *strideTable[T]) getValAndChild(addr uint8) (*T, *strideTable[T]) {
86+
idx := hostIndex(addr)
87+
return t.entries[idx].value, t.entries[idx].child
88+
}
89+
8590
// allot updates entries whose stored prefixIndex matches oldPrefixIndex, in the
8691
// subtree rooted at idx. Matching entries have their stored prefixIndex set to
8792
// newPrefixIndex, and their value set to val.

net/art/stride_table_test.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
)
1717

1818
func TestInversePrefix(t *testing.T) {
19+
t.Parallel()
1920
for i := 0; i < 256; i++ {
2021
for len := 0; len < 9; len++ {
2122
addr := i & (0xFF << (8 - len))
@@ -29,6 +30,7 @@ func TestInversePrefix(t *testing.T) {
2930
}
3031

3132
func TestHostIndex(t *testing.T) {
33+
t.Parallel()
3234
for i := 0; i < 256; i++ {
3335
got := hostIndex(uint8(i))
3436
want := prefixIndex(uint8(i), 8)
@@ -39,6 +41,7 @@ func TestHostIndex(t *testing.T) {
3941
}
4042

4143
func TestStrideTableInsert(t *testing.T) {
44+
t.Parallel()
4245
// Verify that strideTable's lookup results after a bunch of inserts exactly
4346
// match those of a naive implementation that just scans all prefixes on
4447
// every lookup. The naive implementation is very slow, but its behavior is
@@ -66,6 +69,7 @@ func TestStrideTableInsert(t *testing.T) {
6669
}
6770

6871
func TestStrideTableInsertShuffled(t *testing.T) {
72+
t.Parallel()
6973
// The order in which routes are inserted into a route table does not
7074
// influence the final shape of the table, as long as the same set of
7175
// prefixes is being inserted. This test verifies that strideTable behaves
@@ -111,6 +115,7 @@ func TestStrideTableInsertShuffled(t *testing.T) {
111115
}
112116

113117
func TestStrideTableDelete(t *testing.T) {
118+
t.Parallel()
114119
// Compare route deletion to our reference slowTable.
115120
pfxs := shufflePrefixes(allPrefixes())[:100]
116121
slow := slowTable[int]{pfxs}
@@ -145,6 +150,7 @@ func TestStrideTableDelete(t *testing.T) {
145150
}
146151

147152
func TestStrideTableDeleteShuffle(t *testing.T) {
153+
t.Parallel()
148154
// Same as TestStrideTableInsertShuffle, the order in which prefixes are
149155
// deleted should not impact the final shape of the route table.
150156

@@ -191,17 +197,17 @@ func TestStrideTableDeleteShuffle(t *testing.T) {
191197
}
192198
}
193199

194-
var benchRouteCount = []int{10, 50, 100, 200}
200+
var strideRouteCount = []int{10, 50, 100, 200}
195201

196202
// forCountAndOrdering runs the benchmark fn with different sets of routes.
197203
//
198204
// fn is called once for each combination of {num_routes, order}, where
199-
// num_routes is the values in benchRouteCount, and order is the order of the
205+
// num_routes is the values in strideRouteCount, and order is the order of the
200206
// routes in the list: random, largest prefix first (/0 to /8), and smallest
201207
// prefix first (/8 to /0).
202-
func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) {
208+
func forStrideCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) {
203209
routes := shufflePrefixes(allPrefixes())
204-
for _, nroutes := range benchRouteCount {
210+
for _, nroutes := range strideRouteCount {
205211
b.Run(fmt.Sprint(nroutes), func(b *testing.B) {
206212
routes := append([]slowEntry[int](nil), routes[:nroutes]...)
207213
b.Run("random_order", func(b *testing.B) {
@@ -233,7 +239,7 @@ func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[
233239
}
234240

235241
func BenchmarkStrideTableInsertion(b *testing.B) {
236-
forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
242+
forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
237243
val := 0
238244
for i := 0; i < b.N; i++ {
239245
var rt strideTable[int]
@@ -250,7 +256,7 @@ func BenchmarkStrideTableInsertion(b *testing.B) {
250256
}
251257

252258
func BenchmarkStrideTableDeletion(b *testing.B) {
253-
forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
259+
forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
254260
val := 0
255261
var rt strideTable[int]
256262
for _, route := range routes {

net/art/table.go

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,152 @@
1111
// For more information, see Yoichi Hariguchi's paper:
1212
// https://cseweb.ucsd.edu//~varghese/TEACH/cs228/artlookup.pdf
1313
package art
14+
15+
import (
16+
"bytes"
17+
"fmt"
18+
"io"
19+
"net/netip"
20+
"strings"
21+
)
22+
23+
// Table is an IPv4 and IPv6 routing table.
24+
type Table[T any] struct {
25+
v4 strideTable[T]
26+
v6 strideTable[T]
27+
}
28+
29+
// Get does a route lookup for addr and returns the associated value, or nil if
30+
// no route matched.
31+
func (t *Table[T]) Get(addr netip.Addr) *T {
32+
st := &t.v4
33+
if addr.Is6() {
34+
st = &t.v6
35+
}
36+
37+
var ret *T
38+
for _, stride := range addr.AsSlice() {
39+
rt, child := st.getValAndChild(stride)
40+
if rt != nil {
41+
// Found a more specific route than whatever we found previously,
42+
// keep a note.
43+
ret = rt
44+
}
45+
if child == nil {
46+
// No sub-routes further down, whatever we have recorded in ret is
47+
// the result.
48+
return ret
49+
}
50+
st = child
51+
}
52+
53+
// Unreachable because Insert/Delete won't allow the leaf strideTables to
54+
// have children, so we must return via the nil check in the loop.
55+
panic("unreachable")
56+
}
57+
58+
// Insert adds pfx to the table, with value val.
59+
// If pfx is already present in the table, its value is set to val.
60+
func (t *Table[T]) Insert(pfx netip.Prefix, val *T) {
61+
if val == nil {
62+
panic("Table.Insert called with nil value")
63+
}
64+
st := &t.v4
65+
if pfx.Addr().Is6() {
66+
st = &t.v6
67+
}
68+
bs := pfx.Addr().AsSlice()
69+
i := 0
70+
numBits := pfx.Bits()
71+
72+
// The strideTable we want to insert into is potentially at the end of a
73+
// chain of parent tables, each one encoding successive 8 bits of the
74+
// prefix. Navigate downwards, allocating child tables as needed, until we
75+
// find the one this prefix belongs in.
76+
for numBits > 8 {
77+
st = st.getOrCreateChild(bs[i])
78+
i++
79+
numBits -= 8
80+
}
81+
// Finally, insert the remaining 0-8 bits of the prefix into the child
82+
// table.
83+
st.insert(bs[i], numBits, val)
84+
}
85+
86+
// Delete removes pfx from the table, if it is present.
87+
func (t *Table[T]) Delete(pfx netip.Prefix) {
88+
st := &t.v4
89+
if pfx.Addr().Is6() {
90+
st = &t.v6
91+
}
92+
bs := pfx.Addr().AsSlice()
93+
i := 0
94+
numBits := pfx.Bits()
95+
96+
// Deletion may drive the refcount of some strideTables down to zero. We
97+
// need to clean up these dangling tables, so we have to keep track of which
98+
// tables we touch on the way down, and which strideEntry index each child
99+
// is registered in.
100+
strideTables := [16]*strideTable[T]{st}
101+
var strideIndexes [16]int
102+
103+
// Similar to Insert, navigate down the tree of strideTables, looking for
104+
// the one that houses the last 0-8 bits of the prefix to delete.
105+
//
106+
// The only difference is that here, we don't create missing child tables.
107+
// If a child necessary to pfx is missing, then the pfx cannot exist in the
108+
// Table, and we can exit early.
109+
for numBits > 8 {
110+
child, idx := st.getChild(bs[i])
111+
if child == nil {
112+
// Prefix can't exist in the table, one of the necessary
113+
// strideTables doesn't exit.
114+
return
115+
}
116+
// Note that the strideIndex and strideTables entries are off-by-one.
117+
// The child table pointer is recorded at i+1, but it is referenced by a
118+
// particular index in the parent table, at index i.
119+
strideIndexes[i] = idx
120+
i++
121+
strideTables[i] = child
122+
numBits -= 8
123+
st = child
124+
}
125+
if st.delete(bs[i], numBits) == nil {
126+
// Prefix didn't exist in the expected strideTable, refcount hasn't
127+
// changed, no need to run through cleanup.
128+
return
129+
}
130+
131+
// st.delete reduced st's refcount by one, so we may be hanging onto a chain
132+
// of redundant strideTables. Walk back up the path we recorded in the
133+
// descent loop, deleting tables until we encounter one that still has other
134+
// refs (or we hit the root strideTable, which is never deleted).
135+
for i > 0 && strideTables[i].refs == 0 {
136+
strideTables[i-1].deleteChild(strideIndexes[i-1])
137+
i--
138+
}
139+
}
140+
141+
// debugSummary prints the tree of allocated strideTables in t, with each
142+
// strideTable's refcount.
143+
func (t *Table[T]) debugSummary() string {
144+
var ret bytes.Buffer
145+
fmt.Fprintf(&ret, "v4: ")
146+
strideSummary(&ret, &t.v4, 0)
147+
fmt.Fprintf(&ret, "v6: ")
148+
strideSummary(&ret, &t.v6, 0)
149+
return ret.String()
150+
}
151+
152+
func strideSummary[T any](w io.Writer, st *strideTable[T], indent int) {
153+
fmt.Fprintf(w, "%d refs\n", st.refs)
154+
indent += 2
155+
for i := firstHostIndex; i <= lastHostIndex; i++ {
156+
if child := st.entries[i].child; child != nil {
157+
addr, len := inversePrefixIndex(i)
158+
fmt.Fprintf(w, "%s%d/%d: ", strings.Repeat(" ", indent), addr, len)
159+
strideSummary(w, child, indent)
160+
}
161+
}
162+
}

0 commit comments

Comments
 (0)