Skip to content

Commit cc41c84

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: kill the fake untracked conntrack objects
resurrect an old patch from Pablo Neira to remove the untracked objects. Currently, there are four possible states of an skb wrt. conntrack. 1. No conntrack attached, ct is NULL. 2. Normal (kmem cache allocated) ct attached. 3. a template (kmalloc'd), not in any hash tables at any point in time 4. the 'untracked' conntrack, a percpu nf_conn object, tagged via IPS_UNTRACKED_BIT in ct->status. Untracked is supposed to be identical to case 1. It exists only so users can check -m conntrack --ctstate UNTRACKED vs. -m conntrack --ctstate INVALID e.g. attempts to set connmark on INVALID or UNTRACKED conntracks is supposed to be a no-op. Thus currently we need to check ct == NULL || nf_ct_is_untracked(ct) in a lot of places in order to avoid altering untracked objects. The other consequence of the percpu untracked object is that all -j NOTRACK (and, later, kfree_skb of such skbs) result in an atomic op (inc/dec the untracked conntracks refcount). This adds a new kernel-private ctinfo state, IP_CT_UNTRACKED, to make the distinction instead. The (few) places that care about packet invalid (ct is NULL) vs. packet untracked now need to test ct == NULL vs. ctinfo == IP_CT_UNTRACKED, but all other places can omit the nf_ct_is_untracked() check. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent 6e354a5 commit cc41c84

File tree

12 files changed

+39
-97
lines changed

12 files changed

+39
-97
lines changed

include/net/ip_vs.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,12 +1556,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
15561556
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
15571557

15581558
if (!ct || !nf_ct_is_untracked(ct)) {
1559-
struct nf_conn *untracked;
1560-
15611559
nf_conntrack_put(&ct->ct_general);
1562-
untracked = nf_ct_untracked_get();
1563-
nf_conntrack_get(&untracked->ct_general);
1564-
nf_ct_set(skb, untracked, IP_CT_NEW);
1560+
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
15651561
}
15661562
#endif
15671563
}

include/net/netfilter/nf_conntrack.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,6 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
243243
enum ip_conntrack_dir dir,
244244
u32 seq);
245245

246-
/* Fake conntrack entry for untracked connections */
247-
DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
248-
static inline struct nf_conn *nf_ct_untracked_get(void)
249-
{
250-
return raw_cpu_ptr(&nf_conntrack_untracked);
251-
}
252-
void nf_ct_untracked_status_or(unsigned long bits);
253-
254246
/* Iterate over all conntracks: if iter returns true, it's deleted. */
255247
void nf_ct_iterate_cleanup(struct net *net,
256248
int (*iter)(struct nf_conn *i, void *data),
@@ -283,7 +275,7 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct)
283275

284276
static inline int nf_ct_is_untracked(const struct nf_conn *ct)
285277
{
286-
return test_bit(IPS_UNTRACKED_BIT, &ct->status);
278+
return false;
287279
}
288280

289281
/* Packet is received from loopback */

include/uapi/linux/netfilter/nf_conntrack_common.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@ enum ip_conntrack_info {
2828
/* only for userspace compatibility */
2929
#ifndef __KERNEL__
3030
IP_CT_NEW_REPLY = IP_CT_NUMBER,
31+
#else
32+
IP_CT_UNTRACKED = 7,
3133
#endif
3234
};
3335

3436
#define NF_CT_STATE_INVALID_BIT (1 << 0)
3537
#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
36-
#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1))
38+
#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1))
3739

3840
/* Bitset representing status of connection. */
3941
enum ip_conntrack_status {
@@ -94,7 +96,7 @@ enum ip_conntrack_status {
9496
IPS_TEMPLATE_BIT = 11,
9597
IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
9698

97-
/* Conntrack is a fake untracked entry */
99+
/* Conntrack is a fake untracked entry. Obsolete and not used anymore */
98100
IPS_UNTRACKED_BIT = 12,
99101
IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
100102

net/ipv4/netfilter/nf_dup_ipv4.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
6969
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
7070
/* Avoid counting cloned packets towards the original connection. */
7171
nf_reset(skb);
72-
nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
73-
nf_conntrack_get(skb_nfct(skb));
72+
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
7473
#endif
7574
/*
7675
* If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential

net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
221221
type = icmp6h->icmp6_type - 130;
222222
if (type >= 0 && type < sizeof(noct_valid_new) &&
223223
noct_valid_new[type]) {
224-
nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
225-
nf_conntrack_get(skb_nfct(skb));
224+
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
226225
return NF_ACCEPT;
227226
}
228227

net/ipv6/netfilter/nf_dup_ipv6.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
5858

5959
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
6060
nf_reset(skb);
61-
nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
62-
nf_conntrack_get(skb_nfct(skb));
61+
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
6362
#endif
6463
if (hooknum == NF_INET_PRE_ROUTING ||
6564
hooknum == NF_INET_LOCAL_IN) {

net/netfilter/nf_conntrack_core.c

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
180180

181181
unsigned int nf_conntrack_max __read_mostly;
182182
seqcount_t nf_conntrack_generation __read_mostly;
183-
184-
/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
185-
* for the nfctinfo. We cheat by (ab)using the PER CPU cache line
186-
* alignment to enforce this.
187-
*/
188-
DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
189-
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
190-
191183
static unsigned int nf_conntrack_hash_rnd __read_mostly;
192184

193185
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -1314,9 +1306,10 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
13141306
int ret;
13151307

13161308
tmpl = nf_ct_get(skb, &ctinfo);
1317-
if (tmpl) {
1309+
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
13181310
/* Previously seen (loopback or untracked)? Ignore. */
1319-
if (!nf_ct_is_template(tmpl)) {
1311+
if ((tmpl && !nf_ct_is_template(tmpl)) ||
1312+
ctinfo == IP_CT_UNTRACKED) {
13201313
NF_CT_STAT_INC_ATOMIC(net, ignore);
13211314
return NF_ACCEPT;
13221315
}
@@ -1629,18 +1622,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
16291622
}
16301623
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
16311624

1632-
static int untrack_refs(void)
1633-
{
1634-
int cnt = 0, cpu;
1635-
1636-
for_each_possible_cpu(cpu) {
1637-
struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1638-
1639-
cnt += atomic_read(&ct->ct_general.use) - 1;
1640-
}
1641-
return cnt;
1642-
}
1643-
16441625
void nf_conntrack_cleanup_start(void)
16451626
{
16461627
conntrack_gc_work.exiting = true;
@@ -1650,8 +1631,6 @@ void nf_conntrack_cleanup_start(void)
16501631
void nf_conntrack_cleanup_end(void)
16511632
{
16521633
RCU_INIT_POINTER(nf_ct_destroy, NULL);
1653-
while (untrack_refs() > 0)
1654-
schedule();
16551634

16561635
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
16571636
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
@@ -1825,20 +1804,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
18251804
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
18261805
&nf_conntrack_htable_size, 0600);
18271806

1828-
void nf_ct_untracked_status_or(unsigned long bits)
1829-
{
1830-
int cpu;
1831-
1832-
for_each_possible_cpu(cpu)
1833-
per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1834-
}
1835-
EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1836-
18371807
int nf_conntrack_init_start(void)
18381808
{
18391809
int max_factor = 8;
18401810
int ret = -ENOMEM;
1841-
int i, cpu;
1811+
int i;
18421812

18431813
seqcount_init(&nf_conntrack_generation);
18441814

@@ -1921,15 +1891,6 @@ int nf_conntrack_init_start(void)
19211891
if (ret < 0)
19221892
goto err_proto;
19231893

1924-
/* Set up fake conntrack: to never be deleted, not in any hashes */
1925-
for_each_possible_cpu(cpu) {
1926-
struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1927-
write_pnet(&ct->ct_net, &init_net);
1928-
atomic_set(&ct->ct_general.use, 1);
1929-
}
1930-
/* - and look it like as a confirmed connection */
1931-
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1932-
19331894
conntrack_gc_work_init(&conntrack_gc_work);
19341895
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
19351896

@@ -1977,6 +1938,7 @@ int nf_conntrack_init_net(struct net *net)
19771938
int ret = -ENOMEM;
19781939
int cpu;
19791940

1941+
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
19801942
atomic_set(&net->ct.count, 0);
19811943

19821944
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);

net/netfilter/nf_nat_core.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -861,9 +861,6 @@ static int __init nf_nat_init(void)
861861

862862
nf_ct_helper_expectfn_register(&follow_master_nat);
863863

864-
/* Initialize fake conntrack so that NAT will skip it */
865-
nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
866-
867864
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
868865
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
869866
nfnetlink_parse_nat_setup);

net/netfilter/nft_ct.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
7272

7373
switch (priv->key) {
7474
case NFT_CT_STATE:
75-
if (ct == NULL)
76-
state = NF_CT_STATE_INVALID_BIT;
77-
else if (nf_ct_is_untracked(ct))
75+
if (ct)
76+
state = NF_CT_STATE_BIT(ctinfo);
77+
else if (ctinfo == IP_CT_UNTRACKED)
7878
state = NF_CT_STATE_UNTRACKED_BIT;
7979
else
80-
state = NF_CT_STATE_BIT(ctinfo);
80+
state = NF_CT_STATE_INVALID_BIT;
8181
*dest = state;
8282
return;
8383
default:
@@ -718,12 +718,10 @@ static void nft_notrack_eval(const struct nft_expr *expr,
718718

719719
ct = nf_ct_get(pkt->skb, &ctinfo);
720720
/* Previously seen (loopback or untracked)? Ignore. */
721-
if (ct)
721+
if (ct || ctinfo == IP_CT_UNTRACKED)
722722
return;
723723

724-
ct = nf_ct_untracked_get();
725-
atomic_inc(&ct->ct_general.use);
726-
nf_ct_set(skb, ct, IP_CT_NEW);
724+
nf_ct_set(skb, ct, IP_CT_UNTRACKED);
727725
}
728726

729727
static struct nft_expr_type nft_notrack_type;

net/netfilter/xt_CT.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
2626
if (skb->_nfct != 0)
2727
return XT_CONTINUE;
2828

29-
/* special case the untracked ct : we want the percpu object */
30-
if (!ct)
31-
ct = nf_ct_untracked_get();
32-
atomic_inc(&ct->ct_general.use);
33-
nf_ct_set(skb, ct, IP_CT_NEW);
29+
if (ct) {
30+
atomic_inc(&ct->ct_general.use);
31+
nf_ct_set(skb, ct, IP_CT_NEW);
32+
} else {
33+
nf_ct_set(skb, ct, IP_CT_UNTRACKED);
34+
}
3435

3536
return XT_CONTINUE;
3637
}
@@ -335,7 +336,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
335336
struct nf_conn *ct = info->ct;
336337
struct nf_conn_help *help;
337338

338-
if (ct && !nf_ct_is_untracked(ct)) {
339+
if (ct) {
339340
help = nfct_help(ct);
340341
if (help)
341342
module_put(help->helper->me);
@@ -412,8 +413,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
412413
if (skb->_nfct != 0)
413414
return XT_CONTINUE;
414415

415-
nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW);
416-
nf_conntrack_get(skb_nfct(skb));
416+
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
417417

418418
return XT_CONTINUE;
419419
}

net/netfilter/xt_conntrack.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,11 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
172172

173173
ct = nf_ct_get(skb, &ctinfo);
174174

175-
if (ct) {
176-
if (nf_ct_is_untracked(ct))
177-
statebit = XT_CONNTRACK_STATE_UNTRACKED;
178-
else
179-
statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
180-
} else
175+
if (ct)
176+
statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
177+
else if (ctinfo == IP_CT_UNTRACKED)
178+
statebit = XT_CONNTRACK_STATE_UNTRACKED;
179+
else
181180
statebit = XT_CONNTRACK_STATE_INVALID;
182181

183182
if (info->match_flags & XT_CONNTRACK_STATE) {

net/netfilter/xt_state.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,13 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
2828
unsigned int statebit;
2929
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
3030

31-
if (!ct)
31+
if (ct)
32+
statebit = XT_STATE_BIT(ctinfo);
33+
else if (ctinfo == IP_CT_UNTRACKED)
34+
statebit = XT_STATE_UNTRACKED;
35+
else
3236
statebit = XT_STATE_INVALID;
33-
else {
34-
if (nf_ct_is_untracked(ct))
35-
statebit = XT_STATE_UNTRACKED;
36-
else
37-
statebit = XT_STATE_BIT(ctinfo);
38-
}
37+
3938
return (sinfo->statemask & statebit);
4039
}
4140

0 commit comments

Comments
 (0)