Skip to content

Commit cc8f1a3

Browse files
committed
Merge branch 'tap-XDP-support'
Jason Wang says: ==================== XDP support for tap This series tries to implement XDP support for tap. Two path were implemented: - fast path: small & non-gso packet, For performance reason we do it at page level and use build_skb() to create skb if necessary. - slow path: big or gso packet, we don't want to lose the capability compared to generic XDP, so we export some generic xdp helpers and do it after skb was created. xdp1 shows about 41% improvement, xdp_redirect shows about 60% improvement. Changes from V1: - fix the race between xdp set and free - don't hold extra refcount - add XDP_REDIRECT support Please review. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents d022578 + 761876c commit cc8f1a3

File tree

3 files changed

+236
-27
lines changed

3 files changed

+236
-27
lines changed

drivers/net/tun.c

Lines changed: 226 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
#include <linux/seq_file.h>
7474
#include <linux/uio.h>
7575
#include <linux/skb_array.h>
76+
#include <linux/bpf.h>
77+
#include <linux/bpf_trace.h>
7678

7779
#include <linux/uaccess.h>
7880

@@ -105,6 +107,9 @@ do { \
105107
} while (0)
106108
#endif
107109

110+
#define TUN_HEADROOM 256
111+
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM)
112+
108113
/* TUN device flags */
109114

110115
/* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -170,6 +175,7 @@ struct tun_file {
170175
struct list_head next;
171176
struct tun_struct *detached;
172177
struct skb_array tx_array;
178+
struct page_frag alloc_frag;
173179
};
174180

175181
struct tun_flow_entry {
@@ -221,6 +227,7 @@ struct tun_struct {
221227
u32 flow_count;
222228
u32 rx_batched;
223229
struct tun_pcpu_stats __percpu *pcpu_stats;
230+
struct bpf_prog __rcu *xdp_prog;
224231
};
225232

226233
#ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -571,6 +578,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
571578
}
572579
if (tun)
573580
skb_array_cleanup(&tfile->tx_array);
581+
if (tfile->alloc_frag.page)
582+
put_page(tfile->alloc_frag.page);
574583
sock_put(&tfile->sk);
575584
}
576585
}
@@ -585,6 +594,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
585594
static void tun_detach_all(struct net_device *dev)
586595
{
587596
struct tun_struct *tun = netdev_priv(dev);
597+
struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
588598
struct tun_file *tfile, *tmp;
589599
int i, n = tun->numqueues;
590600

@@ -617,6 +627,9 @@ static void tun_detach_all(struct net_device *dev)
617627
}
618628
BUG_ON(tun->numdisabled != 0);
619629

630+
if (xdp_prog)
631+
bpf_prog_put(xdp_prog);
632+
620633
if (tun->flags & IFF_PERSIST)
621634
module_put(THIS_MODULE);
622635
}
@@ -1003,6 +1016,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
10031016
stats->tx_dropped = tx_dropped;
10041017
}
10051018

1019+
static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1020+
struct netlink_ext_ack *extack)
1021+
{
1022+
struct tun_struct *tun = netdev_priv(dev);
1023+
struct bpf_prog *old_prog;
1024+
1025+
old_prog = rtnl_dereference(tun->xdp_prog);
1026+
rcu_assign_pointer(tun->xdp_prog, prog);
1027+
if (old_prog)
1028+
bpf_prog_put(old_prog);
1029+
1030+
return 0;
1031+
}
1032+
1033+
static u32 tun_xdp_query(struct net_device *dev)
1034+
{
1035+
struct tun_struct *tun = netdev_priv(dev);
1036+
const struct bpf_prog *xdp_prog;
1037+
1038+
xdp_prog = rtnl_dereference(tun->xdp_prog);
1039+
if (xdp_prog)
1040+
return xdp_prog->aux->id;
1041+
1042+
return 0;
1043+
}
1044+
1045+
static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
1046+
{
1047+
switch (xdp->command) {
1048+
case XDP_SETUP_PROG:
1049+
return tun_xdp_set(dev, xdp->prog, xdp->extack);
1050+
case XDP_QUERY_PROG:
1051+
xdp->prog_id = tun_xdp_query(dev);
1052+
xdp->prog_attached = !!xdp->prog_id;
1053+
return 0;
1054+
default:
1055+
return -EINVAL;
1056+
}
1057+
}
1058+
10061059
static const struct net_device_ops tun_netdev_ops = {
10071060
.ndo_uninit = tun_net_uninit,
10081061
.ndo_open = tun_net_open,
@@ -1033,6 +1086,7 @@ static const struct net_device_ops tap_netdev_ops = {
10331086
.ndo_features_check = passthru_features_check,
10341087
.ndo_set_rx_headroom = tun_set_headroom,
10351088
.ndo_get_stats64 = tun_net_get_stats64,
1089+
.ndo_xdp = tun_xdp,
10361090
};
10371091

10381092
static void tun_flow_init(struct tun_struct *tun)
@@ -1190,6 +1244,128 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
11901244
}
11911245
}
11921246

1247+
static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
1248+
int len, int noblock, bool zerocopy)
1249+
{
1250+
if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
1251+
return false;
1252+
1253+
if (tfile->socket.sk->sk_sndbuf != INT_MAX)
1254+
return false;
1255+
1256+
if (!noblock)
1257+
return false;
1258+
1259+
if (zerocopy)
1260+
return false;
1261+
1262+
if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1263+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1264+
return false;
1265+
1266+
return true;
1267+
}
1268+
1269+
static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1270+
struct tun_file *tfile,
1271+
struct iov_iter *from,
1272+
struct virtio_net_hdr *hdr,
1273+
int len, int *generic_xdp)
1274+
{
1275+
struct page_frag *alloc_frag = &tfile->alloc_frag;
1276+
struct sk_buff *skb;
1277+
struct bpf_prog *xdp_prog;
1278+
int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1279+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1280+
unsigned int delta = 0;
1281+
char *buf;
1282+
size_t copied;
1283+
bool xdp_xmit = false;
1284+
int err;
1285+
1286+
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1287+
return ERR_PTR(-ENOMEM);
1288+
1289+
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1290+
copied = copy_page_from_iter(alloc_frag->page,
1291+
alloc_frag->offset + TUN_RX_PAD,
1292+
len, from);
1293+
if (copied != len)
1294+
return ERR_PTR(-EFAULT);
1295+
1296+
if (hdr->gso_type)
1297+
*generic_xdp = 1;
1298+
else
1299+
*generic_xdp = 0;
1300+
1301+
rcu_read_lock();
1302+
xdp_prog = rcu_dereference(tun->xdp_prog);
1303+
if (xdp_prog && !*generic_xdp) {
1304+
struct xdp_buff xdp;
1305+
void *orig_data;
1306+
u32 act;
1307+
1308+
xdp.data_hard_start = buf;
1309+
xdp.data = buf + TUN_RX_PAD;
1310+
xdp.data_end = xdp.data + len;
1311+
orig_data = xdp.data;
1312+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
1313+
1314+
switch (act) {
1315+
case XDP_REDIRECT:
1316+
get_page(alloc_frag->page);
1317+
alloc_frag->offset += buflen;
1318+
err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
1319+
if (err)
1320+
goto err_redirect;
1321+
return NULL;
1322+
case XDP_TX:
1323+
xdp_xmit = true;
1324+
/* fall through */
1325+
case XDP_PASS:
1326+
delta = orig_data - xdp.data;
1327+
break;
1328+
default:
1329+
bpf_warn_invalid_xdp_action(act);
1330+
/* fall through */
1331+
case XDP_ABORTED:
1332+
trace_xdp_exception(tun->dev, xdp_prog, act);
1333+
/* fall through */
1334+
case XDP_DROP:
1335+
goto err_xdp;
1336+
}
1337+
}
1338+
1339+
skb = build_skb(buf, buflen);
1340+
if (!skb) {
1341+
rcu_read_unlock();
1342+
return ERR_PTR(-ENOMEM);
1343+
}
1344+
1345+
skb_reserve(skb, TUN_RX_PAD - delta);
1346+
skb_put(skb, len + delta);
1347+
get_page(alloc_frag->page);
1348+
alloc_frag->offset += buflen;
1349+
1350+
if (xdp_xmit) {
1351+
skb->dev = tun->dev;
1352+
generic_xdp_tx(skb, xdp_prog);
1353+
rcu_read_lock();
1354+
return NULL;
1355+
}
1356+
1357+
rcu_read_unlock();
1358+
1359+
return skb;
1360+
1361+
err_redirect:
1362+
put_page(alloc_frag->page);
1363+
err_xdp:
1364+
rcu_read_unlock();
1365+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1366+
return NULL;
1367+
}
1368+
11931369
/* Get packet from user space buffer */
11941370
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
11951371
void *msg_control, struct iov_iter *from,
@@ -1206,6 +1382,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12061382
bool zerocopy = false;
12071383
int err;
12081384
u32 rxhash;
1385+
int generic_xdp = 1;
12091386

12101387
if (!(tun->dev->flags & IFF_UP))
12111388
return -EIO;
@@ -1263,30 +1440,40 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12631440
zerocopy = true;
12641441
}
12651442

1266-
if (!zerocopy) {
1267-
copylen = len;
1268-
if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1269-
linear = good_linear;
1270-
else
1271-
linear = tun16_to_cpu(tun, gso.hdr_len);
1272-
}
1273-
1274-
skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
1275-
if (IS_ERR(skb)) {
1276-
if (PTR_ERR(skb) != -EAGAIN)
1443+
if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
1444+
skb = tun_build_skb(tun, tfile, from, &gso, len, &generic_xdp);
1445+
if (IS_ERR(skb)) {
12771446
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1278-
return PTR_ERR(skb);
1279-
}
1447+
return PTR_ERR(skb);
1448+
}
1449+
if (!skb)
1450+
return total_len;
1451+
} else {
1452+
if (!zerocopy) {
1453+
copylen = len;
1454+
if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1455+
linear = good_linear;
1456+
else
1457+
linear = tun16_to_cpu(tun, gso.hdr_len);
1458+
}
12801459

1281-
if (zerocopy)
1282-
err = zerocopy_sg_from_iter(skb, from);
1283-
else
1284-
err = skb_copy_datagram_from_iter(skb, 0, from, len);
1460+
skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
1461+
if (IS_ERR(skb)) {
1462+
if (PTR_ERR(skb) != -EAGAIN)
1463+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1464+
return PTR_ERR(skb);
1465+
}
12851466

1286-
if (err) {
1287-
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1288-
kfree_skb(skb);
1289-
return -EFAULT;
1467+
if (zerocopy)
1468+
err = zerocopy_sg_from_iter(skb, from);
1469+
else
1470+
err = skb_copy_datagram_from_iter(skb, 0, from, len);
1471+
1472+
if (err) {
1473+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1474+
kfree_skb(skb);
1475+
return -EFAULT;
1476+
}
12901477
}
12911478

12921479
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@ -1334,6 +1521,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
13341521
skb_reset_network_header(skb);
13351522
skb_probe_transport_header(skb, 0);
13361523

1524+
if (generic_xdp) {
1525+
struct bpf_prog *xdp_prog;
1526+
int ret;
1527+
1528+
rcu_read_lock();
1529+
xdp_prog = rcu_dereference(tun->xdp_prog);
1530+
if (xdp_prog) {
1531+
ret = do_xdp_generic(xdp_prog, skb);
1532+
if (ret != XDP_PASS) {
1533+
rcu_read_unlock();
1534+
return total_len;
1535+
}
1536+
}
1537+
rcu_read_unlock();
1538+
}
1539+
13371540
rxhash = __skb_get_hash_symmetric(skb);
13381541
#ifndef CONFIG_4KSTACKS
13391542
tun_rx_batched(tun, tfile, skb, more);
@@ -2377,6 +2580,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
23772580
tfile->sk.sk_write_space = tun_sock_write_space;
23782581
tfile->sk.sk_sndbuf = INT_MAX;
23792582

2583+
tfile->alloc_frag.page = NULL;
2584+
23802585
file->private_data = tfile;
23812586
INIT_LIST_HEAD(&tfile->next);
23822587

include/linux/netdevice.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3243,6 +3243,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
32433243
__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
32443244
}
32453245

3246+
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
3247+
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
32463248
int netif_rx(struct sk_buff *skb);
32473249
int netif_rx_ni(struct sk_buff *skb);
32483250
int netif_receive_skb(struct sk_buff *skb);

0 commit comments

Comments
 (0)