30
30
#include <net/arp.h>
31
31
#include <net/ip.h>
32
32
#include <net/ip_fib.h>
33
+ #include <net/ip6_fib.h>
33
34
#include <net/ip6_route.h>
34
35
#include <net/rtnetlink.h>
35
36
#include <net/route.h>
@@ -57,6 +58,7 @@ struct slave_queue {
57
58
struct net_vrf {
58
59
struct slave_queue queue ;
59
60
struct rtable * rth ;
61
+ struct rt6_info * rt6 ;
60
62
u32 tb_id ;
61
63
};
62
64
@@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = {
104
106
.default_advmss = vrf_default_advmss ,
105
107
};
106
108
109
+ /* neighbor handling is done with actual device; do not want
110
+ * to flip skb->dev for those ndisc packets. This really fails
111
+ * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
112
+ * a start.
113
+ */
114
+ #if IS_ENABLED (CONFIG_IPV6 )
115
+ static bool check_ipv6_frame (const struct sk_buff * skb )
116
+ {
117
+ const struct ipv6hdr * ipv6h = (struct ipv6hdr * )skb -> data ;
118
+ size_t hlen = sizeof (* ipv6h );
119
+ bool rc = true;
120
+
121
+ if (skb -> len < hlen )
122
+ goto out ;
123
+
124
+ if (ipv6h -> nexthdr == NEXTHDR_ICMP ) {
125
+ const struct icmp6hdr * icmph ;
126
+
127
+ if (skb -> len < hlen + sizeof (* icmph ))
128
+ goto out ;
129
+
130
+ icmph = (struct icmp6hdr * )(skb -> data + sizeof (* ipv6h ));
131
+ switch (icmph -> icmp6_type ) {
132
+ case NDISC_ROUTER_SOLICITATION :
133
+ case NDISC_ROUTER_ADVERTISEMENT :
134
+ case NDISC_NEIGHBOUR_SOLICITATION :
135
+ case NDISC_NEIGHBOUR_ADVERTISEMENT :
136
+ case NDISC_REDIRECT :
137
+ rc = false;
138
+ break ;
139
+ }
140
+ }
141
+
142
+ out :
143
+ return rc ;
144
+ }
145
+ #else
146
+ static bool check_ipv6_frame (const struct sk_buff * skb )
147
+ {
148
+ return false;
149
+ }
150
+ #endif
151
+
107
152
static bool is_ip_rx_frame (struct sk_buff * skb )
108
153
{
109
154
switch (skb -> protocol ) {
110
155
case htons (ETH_P_IP ):
111
- case htons (ETH_P_IPV6 ):
112
156
return true;
157
+ case htons (ETH_P_IPV6 ):
158
+ return check_ipv6_frame (skb );
113
159
}
114
160
return false;
115
161
}
@@ -169,12 +215,53 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
169
215
return stats ;
170
216
}
171
217
218
+ #if IS_ENABLED (CONFIG_IPV6 )
219
+ static netdev_tx_t vrf_process_v6_outbound (struct sk_buff * skb ,
220
+ struct net_device * dev )
221
+ {
222
+ const struct ipv6hdr * iph = ipv6_hdr (skb );
223
+ struct net * net = dev_net (skb -> dev );
224
+ struct flowi6 fl6 = {
225
+ /* needed to match OIF rule */
226
+ .flowi6_oif = dev -> ifindex ,
227
+ .flowi6_iif = LOOPBACK_IFINDEX ,
228
+ .daddr = iph -> daddr ,
229
+ .saddr = iph -> saddr ,
230
+ .flowlabel = ip6_flowinfo (iph ),
231
+ .flowi6_mark = skb -> mark ,
232
+ .flowi6_proto = iph -> nexthdr ,
233
+ .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF ,
234
+ };
235
+ int ret = NET_XMIT_DROP ;
236
+ struct dst_entry * dst ;
237
+ struct dst_entry * dst_null = & net -> ipv6 .ip6_null_entry -> dst ;
238
+
239
+ dst = ip6_route_output (net , NULL , & fl6 );
240
+ if (dst == dst_null )
241
+ goto err ;
242
+
243
+ skb_dst_drop (skb );
244
+ skb_dst_set (skb , dst );
245
+
246
+ ret = ip6_local_out (net , skb -> sk , skb );
247
+ if (unlikely (net_xmit_eval (ret )))
248
+ dev -> stats .tx_errors ++ ;
249
+ else
250
+ ret = NET_XMIT_SUCCESS ;
251
+
252
+ return ret ;
253
+ err :
254
+ vrf_tx_error (dev , skb );
255
+ return NET_XMIT_DROP ;
256
+ }
257
+ #else
172
258
static netdev_tx_t vrf_process_v6_outbound (struct sk_buff * skb ,
173
259
struct net_device * dev )
174
260
{
175
261
vrf_tx_error (dev , skb );
176
262
return NET_XMIT_DROP ;
177
263
}
264
+ #endif
178
265
179
266
static int vrf_send_v4_prep (struct sk_buff * skb , struct flowi4 * fl4 ,
180
267
struct net_device * vrf_dev )
@@ -269,6 +356,157 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
269
356
return ret ;
270
357
}
271
358
359
+ #if IS_ENABLED (CONFIG_IPV6 )
360
+ static struct dst_entry * vrf_ip6_check (struct dst_entry * dst , u32 cookie )
361
+ {
362
+ return dst ;
363
+ }
364
+
365
+ static struct dst_ops vrf_dst_ops6 = {
366
+ .family = AF_INET6 ,
367
+ .local_out = ip6_local_out ,
368
+ .check = vrf_ip6_check ,
369
+ .mtu = vrf_v4_mtu ,
370
+ .destroy = vrf_dst_destroy ,
371
+ .default_advmss = vrf_default_advmss ,
372
+ };
373
+
374
+ static int init_dst_ops6_kmem_cachep (void )
375
+ {
376
+ vrf_dst_ops6 .kmem_cachep = kmem_cache_create ("vrf_ip6_dst_cache" ,
377
+ sizeof (struct rt6_info ),
378
+ 0 ,
379
+ SLAB_HWCACHE_ALIGN ,
380
+ NULL );
381
+
382
+ if (!vrf_dst_ops6 .kmem_cachep )
383
+ return - ENOMEM ;
384
+
385
+ return 0 ;
386
+ }
387
+
388
+ static void free_dst_ops6_kmem_cachep (void )
389
+ {
390
+ kmem_cache_destroy (vrf_dst_ops6 .kmem_cachep );
391
+ }
392
+
393
+ static int vrf_input6 (struct sk_buff * skb )
394
+ {
395
+ skb -> dev -> stats .rx_errors ++ ;
396
+ kfree_skb (skb );
397
+ return 0 ;
398
+ }
399
+
400
+ /* modelled after ip6_finish_output2 */
401
+ static int vrf_finish_output6 (struct net * net , struct sock * sk ,
402
+ struct sk_buff * skb )
403
+ {
404
+ struct dst_entry * dst = skb_dst (skb );
405
+ struct net_device * dev = dst -> dev ;
406
+ struct neighbour * neigh ;
407
+ struct in6_addr * nexthop ;
408
+ int ret ;
409
+
410
+ skb -> protocol = htons (ETH_P_IPV6 );
411
+ skb -> dev = dev ;
412
+
413
+ rcu_read_lock_bh ();
414
+ nexthop = rt6_nexthop ((struct rt6_info * )dst , & ipv6_hdr (skb )-> daddr );
415
+ neigh = __ipv6_neigh_lookup_noref (dst -> dev , nexthop );
416
+ if (unlikely (!neigh ))
417
+ neigh = __neigh_create (& nd_tbl , nexthop , dst -> dev , false);
418
+ if (!IS_ERR (neigh )) {
419
+ ret = dst_neigh_output (dst , neigh , skb );
420
+ rcu_read_unlock_bh ();
421
+ return ret ;
422
+ }
423
+ rcu_read_unlock_bh ();
424
+
425
+ IP6_INC_STATS (dev_net (dst -> dev ),
426
+ ip6_dst_idev (dst ), IPSTATS_MIB_OUTNOROUTES );
427
+ kfree_skb (skb );
428
+ return - EINVAL ;
429
+ }
430
+
431
+ /* modelled after ip6_output */
432
+ static int vrf_output6 (struct net * net , struct sock * sk , struct sk_buff * skb )
433
+ {
434
+ return NF_HOOK_COND (NFPROTO_IPV6 , NF_INET_POST_ROUTING ,
435
+ net , sk , skb , NULL , skb_dst (skb )-> dev ,
436
+ vrf_finish_output6 ,
437
+ !(IP6CB (skb )-> flags & IP6SKB_REROUTED ));
438
+ }
439
+
440
+ static void vrf_rt6_destroy (struct net_vrf * vrf )
441
+ {
442
+ dst_destroy (& vrf -> rt6 -> dst );
443
+ free_percpu (vrf -> rt6 -> rt6i_pcpu );
444
+ vrf -> rt6 = NULL ;
445
+ }
446
+
447
+ static int vrf_rt6_create (struct net_device * dev )
448
+ {
449
+ struct net_vrf * vrf = netdev_priv (dev );
450
+ struct dst_entry * dst ;
451
+ struct rt6_info * rt6 ;
452
+ int cpu ;
453
+ int rc = - ENOMEM ;
454
+
455
+ rt6 = dst_alloc (& vrf_dst_ops6 , dev , 0 ,
456
+ DST_OBSOLETE_NONE ,
457
+ (DST_HOST | DST_NOPOLICY | DST_NOXFRM ));
458
+ if (!rt6 )
459
+ goto out ;
460
+
461
+ dst = & rt6 -> dst ;
462
+
463
+ rt6 -> rt6i_pcpu = alloc_percpu_gfp (struct rt6_info * , GFP_KERNEL );
464
+ if (!rt6 -> rt6i_pcpu ) {
465
+ dst_destroy (dst );
466
+ goto out ;
467
+ }
468
+ for_each_possible_cpu (cpu ) {
469
+ struct rt6_info * * p = per_cpu_ptr (rt6 -> rt6i_pcpu , cpu );
470
+ * p = NULL ;
471
+ }
472
+
473
+ memset (dst + 1 , 0 , sizeof (* rt6 ) - sizeof (* dst ));
474
+
475
+ INIT_LIST_HEAD (& rt6 -> rt6i_siblings );
476
+ INIT_LIST_HEAD (& rt6 -> rt6i_uncached );
477
+
478
+ rt6 -> dst .input = vrf_input6 ;
479
+ rt6 -> dst .output = vrf_output6 ;
480
+
481
+ rt6 -> rt6i_table = fib6_get_table (dev_net (dev ), vrf -> tb_id );
482
+
483
+ atomic_set (& rt6 -> dst .__refcnt , 2 );
484
+
485
+ vrf -> rt6 = rt6 ;
486
+ rc = 0 ;
487
+ out :
488
+ return rc ;
489
+ }
490
+ #else
491
+ static int init_dst_ops6_kmem_cachep (void )
492
+ {
493
+ return 0 ;
494
+ }
495
+
496
+ static void free_dst_ops6_kmem_cachep (void )
497
+ {
498
+ }
499
+
500
+ static void vrf_rt6_destroy (struct net_vrf * vrf )
501
+ {
502
+ }
503
+
504
+ static int vrf_rt6_create (struct net_device * dev )
505
+ {
506
+ return 0 ;
507
+ }
508
+ #endif
509
+
272
510
/* modelled after ip_finish_output2 */
273
511
static int vrf_finish_output (struct net * net , struct sock * sk , struct sk_buff * skb )
274
512
{
@@ -490,6 +728,7 @@ static void vrf_dev_uninit(struct net_device *dev)
490
728
struct slave * slave , * next ;
491
729
492
730
vrf_rtable_destroy (vrf );
731
+ vrf_rt6_destroy (vrf );
493
732
494
733
list_for_each_entry_safe (slave , next , head , list )
495
734
vrf_del_slave (dev , slave -> dev );
@@ -513,10 +752,15 @@ static int vrf_dev_init(struct net_device *dev)
513
752
if (!vrf -> rth )
514
753
goto out_stats ;
515
754
755
+ if (vrf_rt6_create (dev ) != 0 )
756
+ goto out_rth ;
757
+
516
758
dev -> flags = IFF_MASTER | IFF_NOARP ;
517
759
518
760
return 0 ;
519
761
762
+ out_rth :
763
+ vrf_rtable_destroy (vrf );
520
764
out_stats :
521
765
free_percpu (dev -> dstats );
522
766
dev -> dstats = NULL ;
@@ -586,10 +830,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
586
830
fl4 -> flowi4_scope = scope ;
587
831
}
588
832
833
+ #if IS_ENABLED (CONFIG_IPV6 )
834
+ static struct dst_entry * vrf_get_rt6_dst (const struct net_device * dev ,
835
+ const struct flowi6 * fl6 )
836
+ {
837
+ struct rt6_info * rt = NULL ;
838
+
839
+ if (!(fl6 -> flowi6_flags & FLOWI_FLAG_L3MDEV_SRC )) {
840
+ struct net_vrf * vrf = netdev_priv (dev );
841
+
842
+ rt = vrf -> rt6 ;
843
+ atomic_inc (& rt -> dst .__refcnt );
844
+ }
845
+
846
+ return (struct dst_entry * )rt ;
847
+ }
848
+ #endif
849
+
589
850
static const struct l3mdev_ops vrf_l3mdev_ops = {
590
851
.l3mdev_fib_table = vrf_fib_table ,
591
852
.l3mdev_get_rtable = vrf_get_rtable ,
592
853
.l3mdev_get_saddr = vrf_get_saddr ,
854
+ #if IS_ENABLED (CONFIG_IPV6 )
855
+ .l3mdev_get_rt6_dst = vrf_get_rt6_dst ,
856
+ #endif
593
857
};
594
858
595
859
static void vrf_get_drvinfo (struct net_device * dev ,
@@ -731,6 +995,10 @@ static int __init vrf_init_module(void)
731
995
if (!vrf_dst_ops .kmem_cachep )
732
996
return - ENOMEM ;
733
997
998
+ rc = init_dst_ops6_kmem_cachep ();
999
+ if (rc != 0 )
1000
+ goto error2 ;
1001
+
734
1002
register_netdevice_notifier (& vrf_notifier_block );
735
1003
736
1004
rc = rtnl_link_register (& vrf_link_ops );
@@ -741,6 +1009,8 @@ static int __init vrf_init_module(void)
741
1009
742
1010
error :
743
1011
unregister_netdevice_notifier (& vrf_notifier_block );
1012
+ free_dst_ops6_kmem_cachep ();
1013
+ error2 :
744
1014
kmem_cache_destroy (vrf_dst_ops .kmem_cachep );
745
1015
return rc ;
746
1016
}
@@ -750,6 +1020,7 @@ static void __exit vrf_cleanup_module(void)
750
1020
rtnl_link_unregister (& vrf_link_ops );
751
1021
unregister_netdevice_notifier (& vrf_notifier_block );
752
1022
kmem_cache_destroy (vrf_dst_ops .kmem_cachep );
1023
+ free_dst_ops6_kmem_cachep ();
753
1024
}
754
1025
755
1026
module_init (vrf_init_module );
0 commit comments