Skip to content

Commit 4b91816

Browse files
committed
Merge branch 'vrf-ipv6'
David Ahern says: ==================== net: VRF support in IPv6 stack Initial support for VRF in IPv6 stack. Makes IPv6 functionality on par with IPv4 -- ping, tcp client/server and udp client/server all work fine. tcpdump on vrf device and external tap (e.g., host side tap device) shows all packets with proper addresses. IPv6 does not need the source address operation like IPv4. Verified vti6 works properly in my setup as does use of an IPv6 address on the VRF device. v3 - re-based to top of net-next (updates per net namespace changes by Eric) - fixed dst_entry typecasts as requested by Dave - added flags to inet6_rtm_getroute (IPv6 version of deaa0a6) v2 - fixed CONFIG_IPV6 dependency as questioned by Cong - if IPV6 is a module, kbuild ensures VRF is a module - if IPV6 is disabled IPV6 functionality is compiled out of VRF module - addressed comments from Nik over IRC - removed duplicate call to netif_is_l3_master in l3mdev_rt6_dst_by_oif - changed allocation flag from GFP_ATOMIC to GFP_KERNEL since it is init time - added free of rt6i_pcpu - check_ipv6_frame returns false only if packet is NDISC type ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents af37939 + ca25449 commit 4b91816

File tree

9 files changed

+386
-16
lines changed

9 files changed

+386
-16
lines changed

drivers/net/Kconfig

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,10 @@ config NLMON
298298

299299
config NET_VRF
300300
tristate "Virtual Routing and Forwarding (Lite)"
301-
depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES
301+
depends on IP_MULTIPLE_TABLES
302302
depends on NET_L3_MASTER_DEV
303+
depends on IPV6 || IPV6=n
304+
depends on IPV6_MULTIPLE_TABLES || IPV6=n
303305
---help---
304306
This option enables the support for mapping interfaces into VRF's. The
305307
support enables VRF devices.

drivers/net/vrf.c

Lines changed: 272 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <net/arp.h>
3131
#include <net/ip.h>
3232
#include <net/ip_fib.h>
33+
#include <net/ip6_fib.h>
3334
#include <net/ip6_route.h>
3435
#include <net/rtnetlink.h>
3536
#include <net/route.h>
@@ -57,6 +58,7 @@ struct slave_queue {
5758
struct net_vrf {
5859
struct slave_queue queue;
5960
struct rtable *rth;
61+
struct rt6_info *rt6;
6062
u32 tb_id;
6163
};
6264

@@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = {
104106
.default_advmss = vrf_default_advmss,
105107
};
106108

109+
/* neighbor handling is done with actual device; do not want
110+
* to flip skb->dev for those ndisc packets. This really fails
111+
* for multiple next protocols (e.g., NEXTHDR_HOP). But it is
112+
* a start.
113+
*/
114+
#if IS_ENABLED(CONFIG_IPV6)
115+
static bool check_ipv6_frame(const struct sk_buff *skb)
116+
{
117+
const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
118+
size_t hlen = sizeof(*ipv6h);
119+
bool rc = true;
120+
121+
if (skb->len < hlen)
122+
goto out;
123+
124+
if (ipv6h->nexthdr == NEXTHDR_ICMP) {
125+
const struct icmp6hdr *icmph;
126+
127+
if (skb->len < hlen + sizeof(*icmph))
128+
goto out;
129+
130+
icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
131+
switch (icmph->icmp6_type) {
132+
case NDISC_ROUTER_SOLICITATION:
133+
case NDISC_ROUTER_ADVERTISEMENT:
134+
case NDISC_NEIGHBOUR_SOLICITATION:
135+
case NDISC_NEIGHBOUR_ADVERTISEMENT:
136+
case NDISC_REDIRECT:
137+
rc = false;
138+
break;
139+
}
140+
}
141+
142+
out:
143+
return rc;
144+
}
145+
#else
146+
static bool check_ipv6_frame(const struct sk_buff *skb)
147+
{
148+
return false;
149+
}
150+
#endif
151+
107152
static bool is_ip_rx_frame(struct sk_buff *skb)
108153
{
109154
switch (skb->protocol) {
110155
case htons(ETH_P_IP):
111-
case htons(ETH_P_IPV6):
112156
return true;
157+
case htons(ETH_P_IPV6):
158+
return check_ipv6_frame(skb);
113159
}
114160
return false;
115161
}
@@ -169,12 +215,53 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
169215
return stats;
170216
}
171217

218+
#if IS_ENABLED(CONFIG_IPV6)
219+
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
220+
struct net_device *dev)
221+
{
222+
const struct ipv6hdr *iph = ipv6_hdr(skb);
223+
struct net *net = dev_net(skb->dev);
224+
struct flowi6 fl6 = {
225+
/* needed to match OIF rule */
226+
.flowi6_oif = dev->ifindex,
227+
.flowi6_iif = LOOPBACK_IFINDEX,
228+
.daddr = iph->daddr,
229+
.saddr = iph->saddr,
230+
.flowlabel = ip6_flowinfo(iph),
231+
.flowi6_mark = skb->mark,
232+
.flowi6_proto = iph->nexthdr,
233+
.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
234+
};
235+
int ret = NET_XMIT_DROP;
236+
struct dst_entry *dst;
237+
struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
238+
239+
dst = ip6_route_output(net, NULL, &fl6);
240+
if (dst == dst_null)
241+
goto err;
242+
243+
skb_dst_drop(skb);
244+
skb_dst_set(skb, dst);
245+
246+
ret = ip6_local_out(net, skb->sk, skb);
247+
if (unlikely(net_xmit_eval(ret)))
248+
dev->stats.tx_errors++;
249+
else
250+
ret = NET_XMIT_SUCCESS;
251+
252+
return ret;
253+
err:
254+
vrf_tx_error(dev, skb);
255+
return NET_XMIT_DROP;
256+
}
257+
#else
172258
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
173259
struct net_device *dev)
174260
{
175261
vrf_tx_error(dev, skb);
176262
return NET_XMIT_DROP;
177263
}
264+
#endif
178265

179266
static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
180267
struct net_device *vrf_dev)
@@ -269,6 +356,157 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
269356
return ret;
270357
}
271358

359+
#if IS_ENABLED(CONFIG_IPV6)
360+
static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie)
361+
{
362+
return dst;
363+
}
364+
365+
static struct dst_ops vrf_dst_ops6 = {
366+
.family = AF_INET6,
367+
.local_out = ip6_local_out,
368+
.check = vrf_ip6_check,
369+
.mtu = vrf_v4_mtu,
370+
.destroy = vrf_dst_destroy,
371+
.default_advmss = vrf_default_advmss,
372+
};
373+
374+
static int init_dst_ops6_kmem_cachep(void)
375+
{
376+
vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache",
377+
sizeof(struct rt6_info),
378+
0,
379+
SLAB_HWCACHE_ALIGN,
380+
NULL);
381+
382+
if (!vrf_dst_ops6.kmem_cachep)
383+
return -ENOMEM;
384+
385+
return 0;
386+
}
387+
388+
static void free_dst_ops6_kmem_cachep(void)
389+
{
390+
kmem_cache_destroy(vrf_dst_ops6.kmem_cachep);
391+
}
392+
393+
static int vrf_input6(struct sk_buff *skb)
394+
{
395+
skb->dev->stats.rx_errors++;
396+
kfree_skb(skb);
397+
return 0;
398+
}
399+
400+
/* modelled after ip6_finish_output2 */
401+
static int vrf_finish_output6(struct net *net, struct sock *sk,
402+
struct sk_buff *skb)
403+
{
404+
struct dst_entry *dst = skb_dst(skb);
405+
struct net_device *dev = dst->dev;
406+
struct neighbour *neigh;
407+
struct in6_addr *nexthop;
408+
int ret;
409+
410+
skb->protocol = htons(ETH_P_IPV6);
411+
skb->dev = dev;
412+
413+
rcu_read_lock_bh();
414+
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
415+
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
416+
if (unlikely(!neigh))
417+
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
418+
if (!IS_ERR(neigh)) {
419+
ret = dst_neigh_output(dst, neigh, skb);
420+
rcu_read_unlock_bh();
421+
return ret;
422+
}
423+
rcu_read_unlock_bh();
424+
425+
IP6_INC_STATS(dev_net(dst->dev),
426+
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
427+
kfree_skb(skb);
428+
return -EINVAL;
429+
}
430+
431+
/* modelled after ip6_output */
432+
static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
433+
{
434+
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
435+
net, sk, skb, NULL, skb_dst(skb)->dev,
436+
vrf_finish_output6,
437+
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
438+
}
439+
440+
static void vrf_rt6_destroy(struct net_vrf *vrf)
441+
{
442+
dst_destroy(&vrf->rt6->dst);
443+
free_percpu(vrf->rt6->rt6i_pcpu);
444+
vrf->rt6 = NULL;
445+
}
446+
447+
static int vrf_rt6_create(struct net_device *dev)
448+
{
449+
struct net_vrf *vrf = netdev_priv(dev);
450+
struct dst_entry *dst;
451+
struct rt6_info *rt6;
452+
int cpu;
453+
int rc = -ENOMEM;
454+
455+
rt6 = dst_alloc(&vrf_dst_ops6, dev, 0,
456+
DST_OBSOLETE_NONE,
457+
(DST_HOST | DST_NOPOLICY | DST_NOXFRM));
458+
if (!rt6)
459+
goto out;
460+
461+
dst = &rt6->dst;
462+
463+
rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL);
464+
if (!rt6->rt6i_pcpu) {
465+
dst_destroy(dst);
466+
goto out;
467+
}
468+
for_each_possible_cpu(cpu) {
469+
struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu);
470+
*p = NULL;
471+
}
472+
473+
memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst));
474+
475+
INIT_LIST_HEAD(&rt6->rt6i_siblings);
476+
INIT_LIST_HEAD(&rt6->rt6i_uncached);
477+
478+
rt6->dst.input = vrf_input6;
479+
rt6->dst.output = vrf_output6;
480+
481+
rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id);
482+
483+
atomic_set(&rt6->dst.__refcnt, 2);
484+
485+
vrf->rt6 = rt6;
486+
rc = 0;
487+
out:
488+
return rc;
489+
}
490+
#else
491+
static int init_dst_ops6_kmem_cachep(void)
492+
{
493+
return 0;
494+
}
495+
496+
static void free_dst_ops6_kmem_cachep(void)
497+
{
498+
}
499+
500+
static void vrf_rt6_destroy(struct net_vrf *vrf)
501+
{
502+
}
503+
504+
static int vrf_rt6_create(struct net_device *dev)
505+
{
506+
return 0;
507+
}
508+
#endif
509+
272510
/* modelled after ip_finish_output2 */
273511
static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
274512
{
@@ -490,6 +728,7 @@ static void vrf_dev_uninit(struct net_device *dev)
490728
struct slave *slave, *next;
491729

492730
vrf_rtable_destroy(vrf);
731+
vrf_rt6_destroy(vrf);
493732

494733
list_for_each_entry_safe(slave, next, head, list)
495734
vrf_del_slave(dev, slave->dev);
@@ -513,10 +752,15 @@ static int vrf_dev_init(struct net_device *dev)
513752
if (!vrf->rth)
514753
goto out_stats;
515754

755+
if (vrf_rt6_create(dev) != 0)
756+
goto out_rth;
757+
516758
dev->flags = IFF_MASTER | IFF_NOARP;
517759

518760
return 0;
519761

762+
out_rth:
763+
vrf_rtable_destroy(vrf);
520764
out_stats:
521765
free_percpu(dev->dstats);
522766
dev->dstats = NULL;
@@ -586,10 +830,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
586830
fl4->flowi4_scope = scope;
587831
}
588832

833+
#if IS_ENABLED(CONFIG_IPV6)
834+
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
835+
const struct flowi6 *fl6)
836+
{
837+
struct rt6_info *rt = NULL;
838+
839+
if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
840+
struct net_vrf *vrf = netdev_priv(dev);
841+
842+
rt = vrf->rt6;
843+
atomic_inc(&rt->dst.__refcnt);
844+
}
845+
846+
return (struct dst_entry *)rt;
847+
}
848+
#endif
849+
589850
static const struct l3mdev_ops vrf_l3mdev_ops = {
590851
.l3mdev_fib_table = vrf_fib_table,
591852
.l3mdev_get_rtable = vrf_get_rtable,
592853
.l3mdev_get_saddr = vrf_get_saddr,
854+
#if IS_ENABLED(CONFIG_IPV6)
855+
.l3mdev_get_rt6_dst = vrf_get_rt6_dst,
856+
#endif
593857
};
594858

595859
static void vrf_get_drvinfo(struct net_device *dev,
@@ -731,6 +995,10 @@ static int __init vrf_init_module(void)
731995
if (!vrf_dst_ops.kmem_cachep)
732996
return -ENOMEM;
733997

998+
rc = init_dst_ops6_kmem_cachep();
999+
if (rc != 0)
1000+
goto error2;
1001+
7341002
register_netdevice_notifier(&vrf_notifier_block);
7351003

7361004
rc = rtnl_link_register(&vrf_link_ops);
@@ -741,6 +1009,8 @@ static int __init vrf_init_module(void)
7411009

7421010
error:
7431011
unregister_netdevice_notifier(&vrf_notifier_block);
1012+
free_dst_ops6_kmem_cachep();
1013+
error2:
7441014
kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
7451015
return rc;
7461016
}
@@ -750,6 +1020,7 @@ static void __exit vrf_cleanup_module(void)
7501020
rtnl_link_unregister(&vrf_link_ops);
7511021
unregister_netdevice_notifier(&vrf_notifier_block);
7521022
kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
1023+
free_dst_ops6_kmem_cachep();
7531024
}
7541025

7551026
module_init(vrf_init_module);

0 commit comments

Comments
 (0)