Skip to content

Commit c6dd940

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: allow early drop of assured conntracks
If insertion of a new conntrack fails because the table is full, the kernel searches the next buckets of the hash slot where the new connection was supposed to be inserted at for an entry that hasn't seen traffic in reply direction (non-assured), if it finds one, that entry is is dropped and the new connection entry is allocated. Allow the conntrack gc worker to also remove *assured* conntracks if resources are low. Do this by querying the l4 tracker, e.g. tcp connections are now dropped if they are no longer established (e.g. in finwait). This could be refined further, e.g. by adding 'soft' established timeout (i.e., a timeout that is only used once we get close to resource exhaustion). Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Florian Westphal <fw@strlen.de> Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent b3a5db1 commit c6dd940

File tree

5 files changed

+102
-0
lines changed

5 files changed

+102
-0
lines changed

include/net/netfilter/nf_conntrack_l4proto.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ struct nf_conntrack_l4proto {
5858
unsigned int dataoff,
5959
u_int8_t pf, unsigned int hooknum);
6060

61+
/* called by gc worker if table is full */
62+
bool (*can_early_drop)(const struct nf_conn *ct);
63+
6164
/* Print out the per-protocol part of the tuple. Return like seq_* */
6265
void (*print_tuple)(struct seq_file *s,
6366
const struct nf_conntrack_tuple *);

net/netfilter/nf_conntrack_core.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
7676
struct delayed_work dwork;
7777
u32 last_bucket;
7878
bool exiting;
79+
bool early_drop;
7980
long next_gc_run;
8081
};
8182

@@ -951,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
951952
return false;
952953
}
953954

955+
static bool gc_worker_skip_ct(const struct nf_conn *ct)
956+
{
957+
return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
958+
}
959+
960+
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
961+
{
962+
const struct nf_conntrack_l4proto *l4proto;
963+
964+
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
965+
return true;
966+
967+
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
968+
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
969+
return true;
970+
971+
return false;
972+
}
973+
954974
static void gc_worker(struct work_struct *work)
955975
{
956976
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
957977
unsigned int i, goal, buckets = 0, expired_count = 0;
978+
unsigned int nf_conntrack_max95 = 0;
958979
struct conntrack_gc_work *gc_work;
959980
unsigned int ratio, scanned = 0;
960981
unsigned long next_run;
@@ -963,6 +984,8 @@ static void gc_worker(struct work_struct *work)
963984

964985
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
965986
i = gc_work->last_bucket;
987+
if (gc_work->early_drop)
988+
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
966989

967990
do {
968991
struct nf_conntrack_tuple_hash *h;
@@ -979,6 +1002,8 @@ static void gc_worker(struct work_struct *work)
9791002
i = 0;
9801003

9811004
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
1005+
struct net *net;
1006+
9821007
tmp = nf_ct_tuplehash_to_ctrack(h);
9831008

9841009
scanned++;
@@ -987,6 +1012,27 @@ static void gc_worker(struct work_struct *work)
9871012
expired_count++;
9881013
continue;
9891014
}
1015+
1016+
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
1017+
continue;
1018+
1019+
net = nf_ct_net(tmp);
1020+
if (atomic_read(&net->ct.count) < nf_conntrack_max95)
1021+
continue;
1022+
1023+
/* need to take reference to avoid possible races */
1024+
if (!atomic_inc_not_zero(&tmp->ct_general.use))
1025+
continue;
1026+
1027+
if (gc_worker_skip_ct(tmp)) {
1028+
nf_ct_put(tmp);
1029+
continue;
1030+
}
1031+
1032+
if (gc_worker_can_early_drop(tmp))
1033+
nf_ct_kill(tmp);
1034+
1035+
nf_ct_put(tmp);
9901036
}
9911037

9921038
/* could check get_nulls_value() here and restart if ct
@@ -1032,6 +1078,7 @@ static void gc_worker(struct work_struct *work)
10321078

10331079
next_run = gc_work->next_gc_run;
10341080
gc_work->last_bucket = i;
1081+
gc_work->early_drop = false;
10351082
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
10361083
}
10371084

@@ -1057,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net,
10571104
if (nf_conntrack_max &&
10581105
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
10591106
if (!early_drop(net, hash)) {
1107+
if (!conntrack_gc_work.early_drop)
1108+
conntrack_gc_work.early_drop = true;
10601109
atomic_dec(&net->ct.count);
10611110
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
10621111
return ERR_PTR(-ENOMEM);

net/netfilter/nf_conntrack_proto_dccp.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,20 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
609609
return -NF_ACCEPT;
610610
}
611611

612+
static bool dccp_can_early_drop(const struct nf_conn *ct)
613+
{
614+
switch (ct->proto.dccp.state) {
615+
case CT_DCCP_CLOSEREQ:
616+
case CT_DCCP_CLOSING:
617+
case CT_DCCP_TIMEWAIT:
618+
return true;
619+
default:
620+
break;
621+
}
622+
623+
return false;
624+
}
625+
612626
static void dccp_print_tuple(struct seq_file *s,
613627
const struct nf_conntrack_tuple *tuple)
614628
{
@@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
868882
.packet = dccp_packet,
869883
.get_timeouts = dccp_get_timeouts,
870884
.error = dccp_error,
885+
.can_early_drop = dccp_can_early_drop,
871886
.print_tuple = dccp_print_tuple,
872887
.print_conntrack = dccp_print_conntrack,
873888
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
902917
.packet = dccp_packet,
903918
.get_timeouts = dccp_get_timeouts,
904919
.error = dccp_error,
920+
.can_early_drop = dccp_can_early_drop,
905921
.print_tuple = dccp_print_tuple,
906922
.print_conntrack = dccp_print_conntrack,
907923
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)

net/netfilter/nf_conntrack_proto_sctp.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,20 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
535535
return -NF_ACCEPT;
536536
}
537537

538+
static bool sctp_can_early_drop(const struct nf_conn *ct)
539+
{
540+
switch (ct->proto.sctp.state) {
541+
case SCTP_CONNTRACK_SHUTDOWN_SENT:
542+
case SCTP_CONNTRACK_SHUTDOWN_RECD:
543+
case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
544+
return true;
545+
default:
546+
break;
547+
}
548+
549+
return false;
550+
}
551+
538552
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
539553

540554
#include <linux/netfilter/nfnetlink.h>
@@ -783,6 +797,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
783797
.get_timeouts = sctp_get_timeouts,
784798
.new = sctp_new,
785799
.error = sctp_error,
800+
.can_early_drop = sctp_can_early_drop,
786801
.me = THIS_MODULE,
787802
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
788803
.to_nlattr = sctp_to_nlattr,
@@ -818,6 +833,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
818833
.get_timeouts = sctp_get_timeouts,
819834
.new = sctp_new,
820835
.error = sctp_error,
836+
.can_early_drop = sctp_can_early_drop,
821837
.me = THIS_MODULE,
822838
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
823839
.to_nlattr = sctp_to_nlattr,

net/netfilter/nf_conntrack_proto_tcp.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
11721172
return true;
11731173
}
11741174

1175+
static bool tcp_can_early_drop(const struct nf_conn *ct)
1176+
{
1177+
switch (ct->proto.tcp.state) {
1178+
case TCP_CONNTRACK_FIN_WAIT:
1179+
case TCP_CONNTRACK_LAST_ACK:
1180+
case TCP_CONNTRACK_TIME_WAIT:
1181+
case TCP_CONNTRACK_CLOSE:
1182+
case TCP_CONNTRACK_CLOSE_WAIT:
1183+
return true;
1184+
default:
1185+
break;
1186+
}
1187+
1188+
return false;
1189+
}
1190+
11751191
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
11761192

11771193
#include <linux/netfilter/nfnetlink.h>
@@ -1549,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
15491565
.get_timeouts = tcp_get_timeouts,
15501566
.new = tcp_new,
15511567
.error = tcp_error,
1568+
.can_early_drop = tcp_can_early_drop,
15521569
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
15531570
.to_nlattr = tcp_to_nlattr,
15541571
.nlattr_size = tcp_nlattr_size,
@@ -1586,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
15861603
.get_timeouts = tcp_get_timeouts,
15871604
.new = tcp_new,
15881605
.error = tcp_error,
1606+
.can_early_drop = tcp_can_early_drop,
15891607
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
15901608
.to_nlattr = tcp_to_nlattr,
15911609
.nlattr_size = tcp_nlattr_size,

0 commit comments

Comments
 (0)