Skip to content

Commit 56d52d4

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: conntrack: use a single hashtable for all namespaces
We already include netns address in the hash and compare the netns pointers during lookup, so even if namespaces have overlapping addresses entries will be spread across the table. Assuming 64k bucket size, this change saves 0.5 mbyte per namespace on a 64bit system. NAT bysrc and expectation hash is still per namespace, those will changed too soon. Future patch will also make conntrack object slab cache global again. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent 1b8c8a9 commit 56d52d4

File tree

10 files changed

+62
-68
lines changed

10 files changed

+62
-68
lines changed

include/net/netfilter/nf_conntrack_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
8181

8282
#define CONNTRACK_LOCKS 1024
8383

84+
extern struct hlist_nulls_head *nf_conntrack_hash;
8485
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
8586
void nf_conntrack_lock(spinlock_t *lock);
8687

include/net/netns/conntrack.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ struct netns_ct {
9393
int sysctl_tstamp;
9494
int sysctl_checksum;
9595

96-
unsigned int htable_size;
9796
struct kmem_cache *nf_conntrack_cachep;
98-
struct hlist_nulls_head *hash;
9997
struct hlist_head *expect_hash;
10098
struct ct_pcpu __percpu *pcpu_lists;
10199
struct ip_conntrack_stat __percpu *stat;

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net)
360360

361361
in->ctl_table[0].data = &nf_conntrack_max;
362362
in->ctl_table[1].data = &net->ct.count;
363-
in->ctl_table[2].data = &net->ct.htable_size;
363+
in->ctl_table[2].data = &nf_conntrack_htable_size;
364364
in->ctl_table[3].data = &net->ct.sysctl_checksum;
365365
in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
366366
#endif

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@ struct ct_iter_state {
3131

3232
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
3333
{
34-
struct net *net = seq_file_net(seq);
3534
struct ct_iter_state *st = seq->private;
3635
struct hlist_nulls_node *n;
3736

3837
for (st->bucket = 0;
39-
st->bucket < net->ct.htable_size;
38+
st->bucket < nf_conntrack_htable_size;
4039
st->bucket++) {
4140
n = rcu_dereference(
42-
hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
41+
hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
4342
if (!is_a_nulls(n))
4443
return n;
4544
}
@@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
4948
static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
5049
struct hlist_nulls_node *head)
5150
{
52-
struct net *net = seq_file_net(seq);
5351
struct ct_iter_state *st = seq->private;
5452

5553
head = rcu_dereference(hlist_nulls_next_rcu(head));
5654
while (is_a_nulls(head)) {
5755
if (likely(get_nulls_value(head) == st->bucket)) {
58-
if (++st->bucket >= net->ct.htable_size)
56+
if (++st->bucket >= nf_conntrack_htable_size)
5957
return NULL;
6058
}
6159
head = rcu_dereference(
62-
hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
60+
hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
6361
}
6462
return head;
6563
}

net/netfilter/nf_conntrack_core.c

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
6969
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
7070
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
7171

72+
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
73+
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
74+
7275
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
7376
static __read_mostly seqcount_t nf_conntrack_generation;
7477
static __read_mostly bool nf_conntrack_locks_all;
@@ -164,9 +167,9 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
164167
tuple->dst.protonum));
165168
}
166169

167-
static u32 hash_bucket(u32 hash, const struct net *net)
170+
static u32 scale_hash(u32 hash)
168171
{
169-
return reciprocal_scale(hash, net->ct.htable_size);
172+
return reciprocal_scale(hash, nf_conntrack_htable_size);
170173
}
171174

172175
static u32 __hash_conntrack(const struct net *net,
@@ -179,7 +182,7 @@ static u32 __hash_conntrack(const struct net *net,
179182
static u32 hash_conntrack(const struct net *net,
180183
const struct nf_conntrack_tuple *tuple)
181184
{
182-
return __hash_conntrack(net, tuple, net->ct.htable_size);
185+
return scale_hash(hash_conntrack_raw(tuple, net));
183186
}
184187

185188
bool
@@ -478,8 +481,8 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
478481
begin:
479482
do {
480483
sequence = read_seqcount_begin(&nf_conntrack_generation);
481-
bucket = hash_bucket(hash, net);
482-
ct_hash = net->ct.hash;
484+
bucket = scale_hash(hash);
485+
ct_hash = nf_conntrack_hash;
483486
} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
484487

485488
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
@@ -543,12 +546,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
543546
unsigned int hash,
544547
unsigned int reply_hash)
545548
{
546-
struct net *net = nf_ct_net(ct);
547-
548549
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
549-
&net->ct.hash[hash]);
550+
&nf_conntrack_hash[hash]);
550551
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
551-
&net->ct.hash[reply_hash]);
552+
&nf_conntrack_hash[reply_hash]);
552553
}
553554

554555
int
@@ -573,12 +574,12 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
573574
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
574575

575576
/* See if there's one in the list already, including reverse */
576-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
577+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
577578
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
578579
zone, net))
579580
goto out;
580581

581-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
582+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
582583
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
583584
zone, net))
584585
goto out;
@@ -633,7 +634,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
633634
sequence = read_seqcount_begin(&nf_conntrack_generation);
634635
/* reuse the hash saved before */
635636
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
636-
hash = hash_bucket(hash, net);
637+
hash = scale_hash(hash);
637638
reply_hash = hash_conntrack(net,
638639
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
639640

@@ -663,12 +664,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
663664
/* See if there's one in the list already, including reverse:
664665
NAT could have grabbed it without realizing, since we're
665666
not in the hash. If there is, we lost race. */
666-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
667+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
667668
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
668669
zone, net))
669670
goto out;
670671

671-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
672+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
672673
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
673674
zone, net))
674675
goto out;
@@ -736,7 +737,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
736737
do {
737738
sequence = read_seqcount_begin(&nf_conntrack_generation);
738739
hash = hash_conntrack(net, tuple);
739-
ct_hash = net->ct.hash;
740+
ct_hash = nf_conntrack_hash;
740741
} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
741742

742743
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
@@ -773,16 +774,16 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
773774
local_bh_disable();
774775
restart:
775776
sequence = read_seqcount_begin(&nf_conntrack_generation);
776-
hash = hash_bucket(_hash, net);
777-
for (; i < net->ct.htable_size; i++) {
777+
hash = scale_hash(_hash);
778+
for (; i < nf_conntrack_htable_size; i++) {
778779
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
779780
nf_conntrack_lock(lockp);
780781
if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
781782
spin_unlock(lockp);
782783
goto restart;
783784
}
784-
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
785-
hnnode) {
785+
hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
786+
hnnode) {
786787
tmp = nf_ct_tuplehash_to_ctrack(h);
787788
if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
788789
!nf_ct_is_dying(tmp) &&
@@ -793,7 +794,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
793794
cnt++;
794795
}
795796

796-
hash = (hash + 1) % net->ct.htable_size;
797+
hash = (hash + 1) % nf_conntrack_htable_size;
797798
spin_unlock(lockp);
798799

799800
if (ct || cnt >= NF_CT_EVICTION_RANGE)
@@ -1376,12 +1377,12 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
13761377
int cpu;
13771378
spinlock_t *lockp;
13781379

1379-
for (; *bucket < net->ct.htable_size; (*bucket)++) {
1380+
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
13801381
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
13811382
local_bh_disable();
13821383
nf_conntrack_lock(lockp);
1383-
if (*bucket < net->ct.htable_size) {
1384-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
1384+
if (*bucket < nf_conntrack_htable_size) {
1385+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
13851386
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
13861387
continue;
13871388
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1478,6 +1479,8 @@ void nf_conntrack_cleanup_end(void)
14781479
while (untrack_refs() > 0)
14791480
schedule();
14801481

1482+
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1483+
14811484
#ifdef CONFIG_NF_CONNTRACK_ZONES
14821485
nf_ct_extend_unregister(&nf_ct_zone_extend);
14831486
#endif
@@ -1528,7 +1531,6 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
15281531
}
15291532

15301533
list_for_each_entry(net, net_exit_list, exit_list) {
1531-
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
15321534
nf_conntrack_proto_pernet_fini(net);
15331535
nf_conntrack_helper_pernet_fini(net);
15341536
nf_conntrack_ecache_pernet_fini(net);
@@ -1599,22 +1601,22 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
15991601
* though since that required taking the locks.
16001602
*/
16011603

1602-
for (i = 0; i < init_net.ct.htable_size; i++) {
1603-
while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
1604-
h = hlist_nulls_entry(init_net.ct.hash[i].first,
1605-
struct nf_conntrack_tuple_hash, hnnode);
1604+
for (i = 0; i < nf_conntrack_htable_size; i++) {
1605+
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1606+
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1607+
struct nf_conntrack_tuple_hash, hnnode);
16061608
ct = nf_ct_tuplehash_to_ctrack(h);
16071609
hlist_nulls_del_rcu(&h->hnnode);
16081610
bucket = __hash_conntrack(nf_ct_net(ct),
16091611
&h->tuple, hashsize);
16101612
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
16111613
}
16121614
}
1613-
old_size = init_net.ct.htable_size;
1614-
old_hash = init_net.ct.hash;
1615+
old_size = nf_conntrack_htable_size;
1616+
old_hash = nf_conntrack_hash;
16151617

1616-
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1617-
init_net.ct.hash = hash;
1618+
nf_conntrack_hash = hash;
1619+
nf_conntrack_htable_size = hashsize;
16181620

16191621
write_seqcount_end(&nf_conntrack_generation);
16201622
nf_conntrack_all_unlock();
@@ -1670,6 +1672,11 @@ int nf_conntrack_init_start(void)
16701672
* entries. */
16711673
max_factor = 4;
16721674
}
1675+
1676+
nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1677+
if (!nf_conntrack_hash)
1678+
return -ENOMEM;
1679+
16731680
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
16741681

16751682
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
@@ -1748,6 +1755,7 @@ int nf_conntrack_init_start(void)
17481755
err_acct:
17491756
nf_conntrack_expect_fini();
17501757
err_expect:
1758+
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
17511759
return ret;
17521760
}
17531761

@@ -1800,12 +1808,6 @@ int nf_conntrack_init_net(struct net *net)
18001808
goto err_cache;
18011809
}
18021810

1803-
net->ct.htable_size = nf_conntrack_htable_size;
1804-
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1805-
if (!net->ct.hash) {
1806-
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1807-
goto err_hash;
1808-
}
18091811
ret = nf_conntrack_expect_pernet_init(net);
18101812
if (ret < 0)
18111813
goto err_expect;
@@ -1837,8 +1839,6 @@ int nf_conntrack_init_net(struct net *net)
18371839
err_acct:
18381840
nf_conntrack_expect_pernet_fini(net);
18391841
err_expect:
1840-
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1841-
err_hash:
18421842
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
18431843
err_cache:
18441844
kfree(net->ct.slabname);

net/netfilter/nf_conntrack_helper.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
424424
spin_unlock_bh(&pcpu->lock);
425425
}
426426
local_bh_disable();
427-
for (i = 0; i < net->ct.htable_size; i++) {
427+
for (i = 0; i < nf_conntrack_htable_size; i++) {
428428
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429-
if (i < net->ct.htable_size) {
430-
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
429+
if (i < nf_conntrack_htable_size) {
430+
hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
431431
unhelp(h, me);
432432
}
433433
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);

net/netfilter/nf_conntrack_netlink.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -824,16 +824,16 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
824824
last = (struct nf_conn *)cb->args[1];
825825

826826
local_bh_disable();
827-
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
827+
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
828828
restart:
829829
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
830830
nf_conntrack_lock(lockp);
831-
if (cb->args[0] >= net->ct.htable_size) {
831+
if (cb->args[0] >= nf_conntrack_htable_size) {
832832
spin_unlock(lockp);
833833
goto out;
834834
}
835-
hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
836-
hnnode) {
835+
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
836+
hnnode) {
837837
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
838838
continue;
839839
ct = nf_ct_tuplehash_to_ctrack(h);

net/netfilter/nf_conntrack_standalone.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,13 @@ struct ct_iter_state {
5454

5555
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
5656
{
57-
struct net *net = seq_file_net(seq);
5857
struct ct_iter_state *st = seq->private;
5958
struct hlist_nulls_node *n;
6059

6160
for (st->bucket = 0;
62-
st->bucket < net->ct.htable_size;
61+
st->bucket < nf_conntrack_htable_size;
6362
st->bucket++) {
64-
n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
63+
n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
6564
if (!is_a_nulls(n))
6665
return n;
6766
}
@@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
7170
static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
7271
struct hlist_nulls_node *head)
7372
{
74-
struct net *net = seq_file_net(seq);
7573
struct ct_iter_state *st = seq->private;
7674

7775
head = rcu_dereference(hlist_nulls_next_rcu(head));
7876
while (is_a_nulls(head)) {
7977
if (likely(get_nulls_value(head) == st->bucket)) {
80-
if (++st->bucket >= net->ct.htable_size)
78+
if (++st->bucket >= nf_conntrack_htable_size)
8179
return NULL;
8280
}
8381
head = rcu_dereference(
8482
hlist_nulls_first_rcu(
85-
&net->ct.hash[st->bucket]));
83+
&nf_conntrack_hash[st->bucket]));
8684
}
8785
return head;
8886
}
@@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
458456
},
459457
{
460458
.procname = "nf_conntrack_buckets",
461-
.data = &init_net.ct.htable_size,
459+
.data = &nf_conntrack_htable_size,
462460
.maxlen = sizeof(unsigned int),
463461
.mode = 0444,
464462
.proc_handler = proc_dointvec,
@@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
512510
goto out_kmemdup;
513511

514512
table[1].data = &net->ct.count;
515-
table[2].data = &net->ct.htable_size;
516513
table[3].data = &net->ct.sysctl_checksum;
517514
table[4].data = &net->ct.sysctl_log_invalid;
518515

0 commit comments

Comments
 (0)