73
73
#include <linux/seq_file.h>
74
74
#include <linux/uio.h>
75
75
#include <linux/skb_array.h>
76
+ #include <linux/bpf.h>
77
+ #include <linux/bpf_trace.h>
76
78
77
79
#include <linux/uaccess.h>
78
80
@@ -105,6 +107,9 @@ do { \
105
107
} while (0)
106
108
#endif
107
109
110
+ #define TUN_HEADROOM 256
111
+ #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM)
112
+
108
113
/* TUN device flags */
109
114
110
115
/* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -170,6 +175,7 @@ struct tun_file {
170
175
struct list_head next ;
171
176
struct tun_struct * detached ;
172
177
struct skb_array tx_array ;
178
+ struct page_frag alloc_frag ;
173
179
};
174
180
175
181
struct tun_flow_entry {
@@ -221,6 +227,7 @@ struct tun_struct {
221
227
u32 flow_count ;
222
228
u32 rx_batched ;
223
229
struct tun_pcpu_stats __percpu * pcpu_stats ;
230
+ struct bpf_prog __rcu * xdp_prog ;
224
231
};
225
232
226
233
#ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -571,6 +578,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
571
578
}
572
579
if (tun )
573
580
skb_array_cleanup (& tfile -> tx_array );
581
+ if (tfile -> alloc_frag .page )
582
+ put_page (tfile -> alloc_frag .page );
574
583
sock_put (& tfile -> sk );
575
584
}
576
585
}
@@ -585,6 +594,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
585
594
static void tun_detach_all (struct net_device * dev )
586
595
{
587
596
struct tun_struct * tun = netdev_priv (dev );
597
+ struct bpf_prog * xdp_prog = rtnl_dereference (tun -> xdp_prog );
588
598
struct tun_file * tfile , * tmp ;
589
599
int i , n = tun -> numqueues ;
590
600
@@ -617,6 +627,9 @@ static void tun_detach_all(struct net_device *dev)
617
627
}
618
628
BUG_ON (tun -> numdisabled != 0 );
619
629
630
+ if (xdp_prog )
631
+ bpf_prog_put (xdp_prog );
632
+
620
633
if (tun -> flags & IFF_PERSIST )
621
634
module_put (THIS_MODULE );
622
635
}
@@ -1003,6 +1016,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
1003
1016
stats -> tx_dropped = tx_dropped ;
1004
1017
}
1005
1018
1019
+ static int tun_xdp_set (struct net_device * dev , struct bpf_prog * prog ,
1020
+ struct netlink_ext_ack * extack )
1021
+ {
1022
+ struct tun_struct * tun = netdev_priv (dev );
1023
+ struct bpf_prog * old_prog ;
1024
+
1025
+ old_prog = rtnl_dereference (tun -> xdp_prog );
1026
+ rcu_assign_pointer (tun -> xdp_prog , prog );
1027
+ if (old_prog )
1028
+ bpf_prog_put (old_prog );
1029
+
1030
+ return 0 ;
1031
+ }
1032
+
1033
+ static u32 tun_xdp_query (struct net_device * dev )
1034
+ {
1035
+ struct tun_struct * tun = netdev_priv (dev );
1036
+ const struct bpf_prog * xdp_prog ;
1037
+
1038
+ xdp_prog = rtnl_dereference (tun -> xdp_prog );
1039
+ if (xdp_prog )
1040
+ return xdp_prog -> aux -> id ;
1041
+
1042
+ return 0 ;
1043
+ }
1044
+
1045
+ static int tun_xdp (struct net_device * dev , struct netdev_xdp * xdp )
1046
+ {
1047
+ switch (xdp -> command ) {
1048
+ case XDP_SETUP_PROG :
1049
+ return tun_xdp_set (dev , xdp -> prog , xdp -> extack );
1050
+ case XDP_QUERY_PROG :
1051
+ xdp -> prog_id = tun_xdp_query (dev );
1052
+ xdp -> prog_attached = !!xdp -> prog_id ;
1053
+ return 0 ;
1054
+ default :
1055
+ return - EINVAL ;
1056
+ }
1057
+ }
1058
+
1006
1059
static const struct net_device_ops tun_netdev_ops = {
1007
1060
.ndo_uninit = tun_net_uninit ,
1008
1061
.ndo_open = tun_net_open ,
@@ -1033,6 +1086,7 @@ static const struct net_device_ops tap_netdev_ops = {
1033
1086
.ndo_features_check = passthru_features_check ,
1034
1087
.ndo_set_rx_headroom = tun_set_headroom ,
1035
1088
.ndo_get_stats64 = tun_net_get_stats64 ,
1089
+ .ndo_xdp = tun_xdp ,
1036
1090
};
1037
1091
1038
1092
static void tun_flow_init (struct tun_struct * tun )
@@ -1190,6 +1244,128 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
1190
1244
}
1191
1245
}
1192
1246
1247
+ static bool tun_can_build_skb (struct tun_struct * tun , struct tun_file * tfile ,
1248
+ int len , int noblock , bool zerocopy )
1249
+ {
1250
+ if ((tun -> flags & TUN_TYPE_MASK ) != IFF_TAP )
1251
+ return false;
1252
+
1253
+ if (tfile -> socket .sk -> sk_sndbuf != INT_MAX )
1254
+ return false;
1255
+
1256
+ if (!noblock )
1257
+ return false;
1258
+
1259
+ if (zerocopy )
1260
+ return false;
1261
+
1262
+ if (SKB_DATA_ALIGN (len + TUN_RX_PAD ) +
1263
+ SKB_DATA_ALIGN (sizeof (struct skb_shared_info )) > PAGE_SIZE )
1264
+ return false;
1265
+
1266
+ return true;
1267
+ }
1268
+
1269
+ static struct sk_buff * tun_build_skb (struct tun_struct * tun ,
1270
+ struct tun_file * tfile ,
1271
+ struct iov_iter * from ,
1272
+ struct virtio_net_hdr * hdr ,
1273
+ int len , int * generic_xdp )
1274
+ {
1275
+ struct page_frag * alloc_frag = & tfile -> alloc_frag ;
1276
+ struct sk_buff * skb ;
1277
+ struct bpf_prog * xdp_prog ;
1278
+ int buflen = SKB_DATA_ALIGN (len + TUN_RX_PAD ) +
1279
+ SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
1280
+ unsigned int delta = 0 ;
1281
+ char * buf ;
1282
+ size_t copied ;
1283
+ bool xdp_xmit = false;
1284
+ int err ;
1285
+
1286
+ if (unlikely (!skb_page_frag_refill (buflen , alloc_frag , GFP_KERNEL )))
1287
+ return ERR_PTR (- ENOMEM );
1288
+
1289
+ buf = (char * )page_address (alloc_frag -> page ) + alloc_frag -> offset ;
1290
+ copied = copy_page_from_iter (alloc_frag -> page ,
1291
+ alloc_frag -> offset + TUN_RX_PAD ,
1292
+ len , from );
1293
+ if (copied != len )
1294
+ return ERR_PTR (- EFAULT );
1295
+
1296
+ if (hdr -> gso_type )
1297
+ * generic_xdp = 1 ;
1298
+ else
1299
+ * generic_xdp = 0 ;
1300
+
1301
+ rcu_read_lock ();
1302
+ xdp_prog = rcu_dereference (tun -> xdp_prog );
1303
+ if (xdp_prog && !* generic_xdp ) {
1304
+ struct xdp_buff xdp ;
1305
+ void * orig_data ;
1306
+ u32 act ;
1307
+
1308
+ xdp .data_hard_start = buf ;
1309
+ xdp .data = buf + TUN_RX_PAD ;
1310
+ xdp .data_end = xdp .data + len ;
1311
+ orig_data = xdp .data ;
1312
+ act = bpf_prog_run_xdp (xdp_prog , & xdp );
1313
+
1314
+ switch (act ) {
1315
+ case XDP_REDIRECT :
1316
+ get_page (alloc_frag -> page );
1317
+ alloc_frag -> offset += buflen ;
1318
+ err = xdp_do_redirect (tun -> dev , & xdp , xdp_prog );
1319
+ if (err )
1320
+ goto err_redirect ;
1321
+ return NULL ;
1322
+ case XDP_TX :
1323
+ xdp_xmit = true;
1324
+ /* fall through */
1325
+ case XDP_PASS :
1326
+ delta = orig_data - xdp .data ;
1327
+ break ;
1328
+ default :
1329
+ bpf_warn_invalid_xdp_action (act );
1330
+ /* fall through */
1331
+ case XDP_ABORTED :
1332
+ trace_xdp_exception (tun -> dev , xdp_prog , act );
1333
+ /* fall through */
1334
+ case XDP_DROP :
1335
+ goto err_xdp ;
1336
+ }
1337
+ }
1338
+
1339
+ skb = build_skb (buf , buflen );
1340
+ if (!skb ) {
1341
+ rcu_read_unlock ();
1342
+ return ERR_PTR (- ENOMEM );
1343
+ }
1344
+
1345
+ skb_reserve (skb , TUN_RX_PAD - delta );
1346
+ skb_put (skb , len + delta );
1347
+ get_page (alloc_frag -> page );
1348
+ alloc_frag -> offset += buflen ;
1349
+
1350
+ if (xdp_xmit ) {
1351
+ skb -> dev = tun -> dev ;
1352
+ generic_xdp_tx (skb , xdp_prog );
1353
+ rcu_read_lock ();
1354
+ return NULL ;
1355
+ }
1356
+
1357
+ rcu_read_unlock ();
1358
+
1359
+ return skb ;
1360
+
1361
+ err_redirect :
1362
+ put_page (alloc_frag -> page );
1363
+ err_xdp :
1364
+ rcu_read_unlock ();
1365
+ this_cpu_inc (tun -> pcpu_stats -> rx_dropped );
1366
+ return NULL ;
1367
+ }
1368
+
1193
1369
/* Get packet from user space buffer */
1194
1370
static ssize_t tun_get_user (struct tun_struct * tun , struct tun_file * tfile ,
1195
1371
void * msg_control , struct iov_iter * from ,
@@ -1206,6 +1382,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1206
1382
bool zerocopy = false;
1207
1383
int err ;
1208
1384
u32 rxhash ;
1385
+ int generic_xdp = 1 ;
1209
1386
1210
1387
if (!(tun -> dev -> flags & IFF_UP ))
1211
1388
return - EIO ;
@@ -1263,30 +1440,40 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1263
1440
zerocopy = true;
1264
1441
}
1265
1442
1266
- if (!zerocopy ) {
1267
- copylen = len ;
1268
- if (tun16_to_cpu (tun , gso .hdr_len ) > good_linear )
1269
- linear = good_linear ;
1270
- else
1271
- linear = tun16_to_cpu (tun , gso .hdr_len );
1272
- }
1273
-
1274
- skb = tun_alloc_skb (tfile , align , copylen , linear , noblock );
1275
- if (IS_ERR (skb )) {
1276
- if (PTR_ERR (skb ) != - EAGAIN )
1443
+ if (tun_can_build_skb (tun , tfile , len , noblock , zerocopy )) {
1444
+ skb = tun_build_skb (tun , tfile , from , & gso , len , & generic_xdp );
1445
+ if (IS_ERR (skb )) {
1277
1446
this_cpu_inc (tun -> pcpu_stats -> rx_dropped );
1278
- return PTR_ERR (skb );
1279
- }
1447
+ return PTR_ERR (skb );
1448
+ }
1449
+ if (!skb )
1450
+ return total_len ;
1451
+ } else {
1452
+ if (!zerocopy ) {
1453
+ copylen = len ;
1454
+ if (tun16_to_cpu (tun , gso .hdr_len ) > good_linear )
1455
+ linear = good_linear ;
1456
+ else
1457
+ linear = tun16_to_cpu (tun , gso .hdr_len );
1458
+ }
1280
1459
1281
- if (zerocopy )
1282
- err = zerocopy_sg_from_iter (skb , from );
1283
- else
1284
- err = skb_copy_datagram_from_iter (skb , 0 , from , len );
1460
+ skb = tun_alloc_skb (tfile , align , copylen , linear , noblock );
1461
+ if (IS_ERR (skb )) {
1462
+ if (PTR_ERR (skb ) != - EAGAIN )
1463
+ this_cpu_inc (tun -> pcpu_stats -> rx_dropped );
1464
+ return PTR_ERR (skb );
1465
+ }
1285
1466
1286
- if (err ) {
1287
- this_cpu_inc (tun -> pcpu_stats -> rx_dropped );
1288
- kfree_skb (skb );
1289
- return - EFAULT ;
1467
+ if (zerocopy )
1468
+ err = zerocopy_sg_from_iter (skb , from );
1469
+ else
1470
+ err = skb_copy_datagram_from_iter (skb , 0 , from , len );
1471
+
1472
+ if (err ) {
1473
+ this_cpu_inc (tun -> pcpu_stats -> rx_dropped );
1474
+ kfree_skb (skb );
1475
+ return - EFAULT ;
1476
+ }
1290
1477
}
1291
1478
1292
1479
if (virtio_net_hdr_to_skb (skb , & gso , tun_is_little_endian (tun ))) {
@@ -1334,6 +1521,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1334
1521
skb_reset_network_header (skb );
1335
1522
skb_probe_transport_header (skb , 0 );
1336
1523
1524
+ if (generic_xdp ) {
1525
+ struct bpf_prog * xdp_prog ;
1526
+ int ret ;
1527
+
1528
+ rcu_read_lock ();
1529
+ xdp_prog = rcu_dereference (tun -> xdp_prog );
1530
+ if (xdp_prog ) {
1531
+ ret = do_xdp_generic (xdp_prog , skb );
1532
+ if (ret != XDP_PASS ) {
1533
+ rcu_read_unlock ();
1534
+ return total_len ;
1535
+ }
1536
+ }
1537
+ rcu_read_unlock ();
1538
+ }
1539
+
1337
1540
rxhash = __skb_get_hash_symmetric (skb );
1338
1541
#ifndef CONFIG_4KSTACKS
1339
1542
tun_rx_batched (tun , tfile , skb , more );
@@ -2377,6 +2580,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
2377
2580
tfile -> sk .sk_write_space = tun_sock_write_space ;
2378
2581
tfile -> sk .sk_sndbuf = INT_MAX ;
2379
2582
2583
+ tfile -> alloc_frag .page = NULL ;
2584
+
2380
2585
file -> private_data = tfile ;
2381
2586
INIT_LIST_HEAD (& tfile -> next );
2382
2587
0 commit comments