Skip to content

Commit 66ccbc9

Browse files
jasowangdavem330
authored andcommitted
tap: use build_skb() for small packet
We use tun_alloc_skb() which calls sock_alloc_send_pskb() to allocate skb in the past. This socket based method is not suitable for high speed userspace like virtualization which usually: - ignore sk_sndbuf (INT_MAX) and expect to receive the packet as fast as possible - don't want to be block at sendmsg() To eliminate the above overheads, this patch tries to use build_skb() for small packet. We will do this only when the following conditions are all met: - TAP instead of TUN - sk_sndbuf is INT_MAX - caller don't want to be blocked - zerocopy is not used - packet size is smaller enough to use build_skb() Pktgen from guest to host shows ~11% improvement for rx pps of tap: Before: ~1.70Mpps After : ~1.88Mpps What's more important, this makes it possible to implement XDP for tap before creating skbs. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent d022578 commit 66ccbc9

File tree

1 file changed

+91
-21
lines changed

1 file changed

+91
-21
lines changed

drivers/net/tun.c

Lines changed: 91 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ do { \
105105
} while (0)
106106
#endif
107107

108+
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
109+
108110
/* TUN device flags */
109111

110112
/* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -170,6 +172,7 @@ struct tun_file {
170172
struct list_head next;
171173
struct tun_struct *detached;
172174
struct skb_array tx_array;
175+
struct page_frag alloc_frag;
173176
};
174177

175178
struct tun_flow_entry {
@@ -571,6 +574,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
571574
}
572575
if (tun)
573576
skb_array_cleanup(&tfile->tx_array);
577+
if (tfile->alloc_frag.page)
578+
put_page(tfile->alloc_frag.page);
574579
sock_put(&tfile->sk);
575580
}
576581
}
@@ -1190,6 +1195,61 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
11901195
}
11911196
}
11921197

1198+
static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
1199+
int len, int noblock, bool zerocopy)
1200+
{
1201+
if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
1202+
return false;
1203+
1204+
if (tfile->socket.sk->sk_sndbuf != INT_MAX)
1205+
return false;
1206+
1207+
if (!noblock)
1208+
return false;
1209+
1210+
if (zerocopy)
1211+
return false;
1212+
1213+
if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1214+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1215+
return false;
1216+
1217+
return true;
1218+
}
1219+
1220+
static struct sk_buff *tun_build_skb(struct tun_file *tfile,
1221+
struct iov_iter *from,
1222+
int len)
1223+
{
1224+
struct page_frag *alloc_frag = &tfile->alloc_frag;
1225+
struct sk_buff *skb;
1226+
int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1227+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1228+
char *buf;
1229+
size_t copied;
1230+
1231+
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1232+
return ERR_PTR(-ENOMEM);
1233+
1234+
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1235+
copied = copy_page_from_iter(alloc_frag->page,
1236+
alloc_frag->offset + TUN_RX_PAD,
1237+
len, from);
1238+
if (copied != len)
1239+
return ERR_PTR(-EFAULT);
1240+
1241+
skb = build_skb(buf, buflen);
1242+
if (!skb)
1243+
return ERR_PTR(-ENOMEM);
1244+
1245+
skb_reserve(skb, TUN_RX_PAD);
1246+
skb_put(skb, len);
1247+
get_page(alloc_frag->page);
1248+
alloc_frag->offset += buflen;
1249+
1250+
return skb;
1251+
}
1252+
11931253
/* Get packet from user space buffer */
11941254
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
11951255
void *msg_control, struct iov_iter *from,
@@ -1263,30 +1323,38 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12631323
zerocopy = true;
12641324
}
12651325

1266-
if (!zerocopy) {
1267-
copylen = len;
1268-
if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1269-
linear = good_linear;
1270-
else
1271-
linear = tun16_to_cpu(tun, gso.hdr_len);
1272-
}
1273-
1274-
skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
1275-
if (IS_ERR(skb)) {
1276-
if (PTR_ERR(skb) != -EAGAIN)
1326+
if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
1327+
skb = tun_build_skb(tfile, from, len);
1328+
if (IS_ERR(skb)) {
12771329
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1278-
return PTR_ERR(skb);
1279-
}
1330+
return PTR_ERR(skb);
1331+
}
1332+
} else {
1333+
if (!zerocopy) {
1334+
copylen = len;
1335+
if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1336+
linear = good_linear;
1337+
else
1338+
linear = tun16_to_cpu(tun, gso.hdr_len);
1339+
}
12801340

1281-
if (zerocopy)
1282-
err = zerocopy_sg_from_iter(skb, from);
1283-
else
1284-
err = skb_copy_datagram_from_iter(skb, 0, from, len);
1341+
skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
1342+
if (IS_ERR(skb)) {
1343+
if (PTR_ERR(skb) != -EAGAIN)
1344+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1345+
return PTR_ERR(skb);
1346+
}
12851347

1286-
if (err) {
1287-
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1288-
kfree_skb(skb);
1289-
return -EFAULT;
1348+
if (zerocopy)
1349+
err = zerocopy_sg_from_iter(skb, from);
1350+
else
1351+
err = skb_copy_datagram_from_iter(skb, 0, from, len);
1352+
1353+
if (err) {
1354+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1355+
kfree_skb(skb);
1356+
return -EFAULT;
1357+
}
12901358
}
12911359

12921360
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@ -2377,6 +2445,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
23772445
tfile->sk.sk_write_space = tun_sock_write_space;
23782446
tfile->sk.sk_sndbuf = INT_MAX;
23792447

2448+
tfile->alloc_frag.page = NULL;
2449+
23802450
file->private_data = tfile;
23812451
INIT_LIST_HEAD(&tfile->next);
23822452

0 commit comments

Comments
 (0)