Skip to content

Commit 79134e6

Browse files
edumazetdavem330
authored andcommitted
net: do not create fallback tunnels for non-default namespaces
fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0, ip6tnl0, ip6gre0) are automatically created when the corresponding module is loaded. These tunnels are also automatically created when a new network namespace is created, at a great cost. In many cases, netns are used for isolation purposes, and these extra network devices are a waste of resources. We are using thousands of netns per host, and hit the netns creation/delete bottleneck a lot. (Many thanks to Kirill for recent work on this) Add a new sysctl so that we can opt-out from this automatic creation. Note that these tunnels are still created for the initial namespace, to be the least intrusive for typical setups. Tested: lpk43:~# cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m37.521s user 0m0.886s sys 7m7.084s lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m4.761s user 0m0.851s sys 1m8.343s lpk43:~# Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 2b3905d commit 79134e6

File tree

8 files changed

+54
-10
lines changed

8 files changed

+54
-10
lines changed

Documentation/sysctl/net.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,18 @@ optmem_max
270270
Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
271271
of struct cmsghdr structures with appended data.
272272

273+
fb_tunnels_only_for_init_net
274+
----------------------------
275+
276+
Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
277+
sit0, ip6tnl0, ip6gre0) are automatically created when a new
278+
network namespace is created, if corresponding tunnel is present
279+
in initial network namespace.
280+
If set to 1, these devices are not automatically created, and
281+
user space is responsible for creating them if needed.
282+
283+
Default : 0 (for compatibility reasons)
284+
273285
2. /proc/sys/net/unix - Parameters for Unix domain sockets
274286
-------------------------------------------------------
275287

include/linux/netdevice.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,13 @@ struct netdev_queue {
585585
#endif
586586
} ____cacheline_aligned_in_smp;
587587

588+
extern int sysctl_fb_tunnels_only_for_init_net;
589+
590+
static inline bool net_has_fallback_tunnels(const struct net *net)
591+
{
592+
return net == &init_net || !sysctl_fb_tunnels_only_for_init_net;
593+
}
594+
588595
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
589596
{
590597
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)

include/net/ip_tunnels.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,10 @@ struct tnl_ptk_info {
180180

181181
struct ip_tunnel_net {
182182
struct net_device *fb_tunnel_dev;
183+
struct rtnl_link_ops *rtnl_link_ops;
183184
struct hlist_head tunnels[IP_TNL_HASH_SIZE];
184185
struct ip_tunnel __rcu *collect_md_tun;
186+
int type;
185187
};
186188

187189
static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,

net/core/sysctl_net_core.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
3232

3333
static int net_msg_warn; /* Unused, but still a sysctl */
3434

35+
int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
36+
EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
37+
3538
#ifdef CONFIG_RPS
3639
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
3740
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
513516
.proc_handler = proc_dointvec_minmax,
514517
.extra1 = &zero,
515518
},
519+
{
520+
.procname = "fb_tunnels_only_for_init_net",
521+
.data = &sysctl_fb_tunnels_only_for_init_net,
522+
.maxlen = sizeof(int),
523+
.mode = 0644,
524+
.proc_handler = proc_dointvec_minmax,
525+
.extra1 = &zero,
526+
.extra2 = &one,
527+
},
516528
{ }
517529
};
518530

net/ipv4/ip_tunnel.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
347347
struct net_device *dev;
348348
int t_hlen;
349349

350-
BUG_ON(!itn->fb_tunnel_dev);
351-
dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
350+
dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
352351
if (IS_ERR(dev))
353352
return ERR_CAST(dev);
354353

@@ -822,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
822821
struct net *net = t->net;
823822
struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
824823

825-
BUG_ON(!itn->fb_tunnel_dev);
826824
switch (cmd) {
827825
case SIOCGETTUNNEL:
828826
if (dev == itn->fb_tunnel_dev) {
@@ -847,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
847845
p->o_key = 0;
848846
}
849847

850-
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
848+
t = ip_tunnel_find(itn, p, itn->type);
851849

852850
if (cmd == SIOCADDTUNNEL) {
853851
if (!t) {
@@ -991,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
991989
struct ip_tunnel_parm parms;
992990
unsigned int i;
993991

992+
itn->rtnl_link_ops = ops;
994993
for (i = 0; i < IP_TNL_HASH_SIZE; i++)
995994
INIT_HLIST_HEAD(&itn->tunnels[i]);
996995

997-
if (!ops) {
996+
if (!ops || !net_has_fallback_tunnels(net)) {
997+
struct ip_tunnel_net *it_init_net;
998+
999+
it_init_net = net_generic(&init_net, ip_tnl_net_id);
1000+
itn->type = it_init_net->type;
9981001
itn->fb_tunnel_dev = NULL;
9991002
return 0;
10001003
}
@@ -1012,17 +1015,18 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
10121015
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
10131016
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
10141017
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1018+
itn->type = itn->fb_tunnel_dev->type;
10151019
}
10161020
rtnl_unlock();
10171021

10181022
return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
10191023
}
10201024
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
10211025

1022-
static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1026+
static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1027+
struct list_head *head,
10231028
struct rtnl_link_ops *ops)
10241029
{
1025-
struct net *net = dev_net(itn->fb_tunnel_dev);
10261030
struct net_device *dev, *aux;
10271031
int h;
10281032

@@ -1054,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
10541058
rtnl_lock();
10551059
list_for_each_entry(net, net_list, exit_list) {
10561060
itn = net_generic(net, id);
1057-
ip_tunnel_destroy(itn, &list, ops);
1061+
ip_tunnel_destroy(net, itn, &list, ops);
10581062
}
10591063
unregister_netdevice_many(&list);
10601064
rtnl_unlock();

net/ipv6/ip6_gre.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
236236
return t;
237237

238238
dev = ign->fb_tunnel_dev;
239-
if (dev->flags & IFF_UP)
239+
if (dev && dev->flags & IFF_UP)
240240
return netdev_priv(dev);
241241

242242
return NULL;
@@ -1472,6 +1472,8 @@ static int __net_init ip6gre_init_net(struct net *net)
14721472
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
14731473
int err;
14741474

1475+
if (!net_has_fallback_tunnels(net))
1476+
return 0;
14751477
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
14761478
NET_NAME_UNKNOWN,
14771479
ip6gre_tunnel_setup);

net/ipv6/ip6_tunnel.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,6 +2205,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
22052205
ip6n->tnls[0] = ip6n->tnls_wc;
22062206
ip6n->tnls[1] = ip6n->tnls_r_l;
22072207

2208+
if (!net_has_fallback_tunnels(net))
2209+
return 0;
22082210
err = -ENOMEM;
22092211
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
22102212
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);

net/ipv6/sit.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
182182
#ifdef CONFIG_IPV6_SIT_6RD
183183
struct ip_tunnel *t = netdev_priv(dev);
184184

185-
if (dev == sitn->fb_tunnel_dev) {
185+
if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
186186
ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
187187
t->ip6rd.relay_prefix = 0;
188188
t->ip6rd.prefixlen = 16;
@@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
18351835
sitn->tunnels[2] = sitn->tunnels_r;
18361836
sitn->tunnels[3] = sitn->tunnels_r_l;
18371837

1838+
if (!net_has_fallback_tunnels(net))
1839+
return 0;
1840+
18381841
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
18391842
NET_NAME_UNKNOWN,
18401843
ipip6_tunnel_setup);

0 commit comments

Comments
 (0)