Skip to content

Commit b16c291

Browse files
sashalevinummakynes
authored andcommitted
netfilter: nf_conntrack: use safer way to lock all buckets
When we need to lock all buckets in the connection hashtable we'd attempt to lock 1024 spinlocks, which is way more preemption levels than supported by the kernel. Furthermore, this behavior was hidden by checking if lockdep is enabled, and if it was - use only 8 buckets(!). Fix this by using a global lock and synchronize all buckets on it when we need to lock them all. This is pretty heavyweight, but is only done when we need to resize the hashtable, and that doesn't happen often enough (or at all). Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent 35b8153 commit b16c291

File tree

5 files changed

+35
-19
lines changed

5 files changed

+35
-19
lines changed

include/net/netfilter/nf_conntrack_core.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
7979
const struct nf_conntrack_l3proto *l3proto,
8080
const struct nf_conntrack_l4proto *proto);
8181

82-
#ifdef CONFIG_LOCKDEP
83-
# define CONNTRACK_LOCKS 8
84-
#else
85-
# define CONNTRACK_LOCKS 1024
86-
#endif
82+
#define CONNTRACK_LOCKS 1024
83+
8784
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
85+
void nf_conntrack_lock(spinlock_t *lock);
8886

8987
extern spinlock_t nf_conntrack_expect_lock;
9088

net/netfilter/nf_conntrack_core.c

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
6666
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
6767
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
6868

69+
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
70+
static __read_mostly bool nf_conntrack_locks_all;
71+
72+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
73+
{
74+
spin_lock(lock);
75+
while (unlikely(nf_conntrack_locks_all)) {
76+
spin_unlock(lock);
77+
spin_lock(&nf_conntrack_locks_all_lock);
78+
spin_unlock(&nf_conntrack_locks_all_lock);
79+
spin_lock(lock);
80+
}
81+
}
82+
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
83+
6984
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
7085
{
7186
h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
8297
h1 %= CONNTRACK_LOCKS;
8398
h2 %= CONNTRACK_LOCKS;
8499
if (h1 <= h2) {
85-
spin_lock(&nf_conntrack_locks[h1]);
100+
nf_conntrack_lock(&nf_conntrack_locks[h1]);
86101
if (h1 != h2)
87102
spin_lock_nested(&nf_conntrack_locks[h2],
88103
SINGLE_DEPTH_NESTING);
89104
} else {
90-
spin_lock(&nf_conntrack_locks[h2]);
105+
nf_conntrack_lock(&nf_conntrack_locks[h2]);
91106
spin_lock_nested(&nf_conntrack_locks[h1],
92107
SINGLE_DEPTH_NESTING);
93108
}
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
102117
{
103118
int i;
104119

105-
for (i = 0; i < CONNTRACK_LOCKS; i++)
106-
spin_lock_nested(&nf_conntrack_locks[i], i);
120+
spin_lock(&nf_conntrack_locks_all_lock);
121+
nf_conntrack_locks_all = true;
122+
123+
for (i = 0; i < CONNTRACK_LOCKS; i++) {
124+
spin_lock(&nf_conntrack_locks[i]);
125+
spin_unlock(&nf_conntrack_locks[i]);
126+
}
107127
}
108128

109129
static void nf_conntrack_all_unlock(void)
110130
{
111-
int i;
112-
113-
for (i = 0; i < CONNTRACK_LOCKS; i++)
114-
spin_unlock(&nf_conntrack_locks[i]);
131+
nf_conntrack_locks_all = false;
132+
spin_unlock(&nf_conntrack_locks_all_lock);
115133
}
116134

117135
unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
757775
hash = hash_bucket(_hash, net);
758776
for (; i < net->ct.htable_size; i++) {
759777
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
760-
spin_lock(lockp);
778+
nf_conntrack_lock(lockp);
761779
if (read_seqcount_retry(&net->ct.generation, sequence)) {
762780
spin_unlock(lockp);
763781
goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
13821400
for (; *bucket < net->ct.htable_size; (*bucket)++) {
13831401
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
13841402
local_bh_disable();
1385-
spin_lock(lockp);
1403+
nf_conntrack_lock(lockp);
13861404
if (*bucket < net->ct.htable_size) {
13871405
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
13881406
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)

net/netfilter/nf_conntrack_helper.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
425425
}
426426
local_bh_disable();
427427
for (i = 0; i < net->ct.htable_size; i++) {
428-
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
428+
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429429
if (i < net->ct.htable_size) {
430430
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
431431
unhelp(h, me);

net/netfilter/nf_conntrack_netlink.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
840840
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
841841
restart:
842842
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
843-
spin_lock(lockp);
843+
nf_conntrack_lock(lockp);
844844
if (cb->args[0] >= net->ct.htable_size) {
845845
spin_unlock(lockp);
846846
goto out;

net/netfilter/nfnetlink_cttimeout.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
307307

308308
local_bh_disable();
309309
for (i = 0; i < net->ct.htable_size; i++) {
310-
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
310+
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
311311
if (i < net->ct.htable_size) {
312312
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
313313
untimeout(h, timeout);
314314
}
315-
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
315+
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
316316
}
317317
local_bh_enable();
318318
}

0 commit comments

Comments
 (0)