Skip to content

Commit 4941d47

Browse files
jasowangdavem330
authored andcommitted
virtio-net: do not reset during XDP set
We currently reset the device during XDP set, the main reason is that we allocate more headroom with XDP (for header adjustment). This works but causes network downtime for users. Previous patches encoded the headroom in the buffer context, this makes it possible to detect the case where a buffer with headroom insufficient for XDP is added to the queue and XDP is enabled afterwards. Upon detection, we handle this case by copying the packet (slow, but it's a temporary condition). Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 192f68c commit 4941d47

File tree

1 file changed

+106
-126
lines changed

1 file changed

+106
-126
lines changed

drivers/net/virtio_net.c

Lines changed: 106 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,69 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
407407
return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
408408
}
409409

410+
/* We copy the packet for XDP in the following cases:
411+
*
412+
* 1) Packet is scattered across multiple rx buffers.
413+
* 2) Headroom space is insufficient.
414+
*
415+
* This is inefficient but it's a temporary condition that
416+
* we hit right after XDP is enabled and until queue is refilled
417+
* with large buffers with sufficient headroom - so it should affect
418+
* at most queue size packets.
419+
* Afterwards, the conditions to enable
420+
* XDP should preclude the underlying device from sending packets
421+
* across multiple buffers (num_buf > 1), and we make sure buffers
422+
* have enough headroom.
423+
*/
424+
static struct page *xdp_linearize_page(struct receive_queue *rq,
425+
u16 *num_buf,
426+
struct page *p,
427+
int offset,
428+
int page_off,
429+
unsigned int *len)
430+
{
431+
struct page *page = alloc_page(GFP_ATOMIC);
432+
433+
if (!page)
434+
return NULL;
435+
436+
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
437+
page_off += *len;
438+
439+
while (--*num_buf) {
440+
unsigned int buflen;
441+
void *buf;
442+
int off;
443+
444+
buf = virtqueue_get_buf(rq->vq, &buflen);
445+
if (unlikely(!buf))
446+
goto err_buf;
447+
448+
p = virt_to_head_page(buf);
449+
off = buf - page_address(p);
450+
451+
/* guard against a misconfigured or uncooperative backend that
452+
* is sending packet larger than the MTU.
453+
*/
454+
if ((page_off + buflen) > PAGE_SIZE) {
455+
put_page(p);
456+
goto err_buf;
457+
}
458+
459+
memcpy(page_address(page) + page_off,
460+
page_address(p) + off, buflen);
461+
page_off += buflen;
462+
put_page(p);
463+
}
464+
465+
/* Headroom does not contribute to packet length */
466+
*len = page_off - VIRTIO_XDP_HEADROOM;
467+
return page;
468+
err_buf:
469+
__free_pages(page, 0);
470+
return NULL;
471+
}
472+
410473
static struct sk_buff *receive_small(struct net_device *dev,
411474
struct virtnet_info *vi,
412475
struct receive_queue *rq,
@@ -415,12 +478,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
415478
{
416479
struct sk_buff *skb;
417480
struct bpf_prog *xdp_prog;
418-
unsigned int xdp_headroom = virtnet_get_headroom(vi);
481+
unsigned int xdp_headroom = (unsigned long)ctx;
419482
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
420483
unsigned int headroom = vi->hdr_len + header_offset;
421484
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
422485
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
486+
struct page *page = virt_to_head_page(buf);
423487
unsigned int delta = 0;
488+
struct page *xdp_page;
424489
len -= vi->hdr_len;
425490

426491
rcu_read_lock();
@@ -434,6 +499,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
434499
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
435500
goto err_xdp;
436501

502+
if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
503+
int offset = buf - page_address(page) + header_offset;
504+
unsigned int tlen = len + vi->hdr_len;
505+
u16 num_buf = 1;
506+
507+
xdp_headroom = virtnet_get_headroom(vi);
508+
header_offset = VIRTNET_RX_PAD + xdp_headroom;
509+
headroom = vi->hdr_len + header_offset;
510+
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
511+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
512+
xdp_page = xdp_linearize_page(rq, &num_buf, page,
513+
offset, header_offset,
514+
&tlen);
515+
if (!xdp_page)
516+
goto err_xdp;
517+
518+
buf = page_address(xdp_page);
519+
put_page(page);
520+
page = xdp_page;
521+
}
522+
437523
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
438524
xdp.data = xdp.data_hard_start + xdp_headroom;
439525
xdp.data_end = xdp.data + len;
@@ -462,7 +548,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
462548

463549
skb = build_skb(buf, buflen);
464550
if (!skb) {
465-
put_page(virt_to_head_page(buf));
551+
put_page(page);
466552
goto err;
467553
}
468554
skb_reserve(skb, headroom - delta);
@@ -478,7 +564,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
478564
err_xdp:
479565
rcu_read_unlock();
480566
dev->stats.rx_dropped++;
481-
put_page(virt_to_head_page(buf));
567+
put_page(page);
482568
xdp_xmit:
483569
return NULL;
484570
}
@@ -503,66 +589,6 @@ static struct sk_buff *receive_big(struct net_device *dev,
503589
return NULL;
504590
}
505591

506-
/* The conditions to enable XDP should preclude the underlying device from
507-
* sending packets across multiple buffers (num_buf > 1). However per spec
508-
* it does not appear to be illegal to do so but rather just against convention.
509-
* So in order to avoid making a system unresponsive the packets are pushed
510-
* into a page and the XDP program is run. This will be extremely slow and we
511-
* push a warning to the user to fix this as soon as possible. Fixing this may
512-
* require resolving the underlying hardware to determine why multiple buffers
513-
* are being received or simply loading the XDP program in the ingress stack
514-
* after the skb is built because there is no advantage to running it here
515-
* anymore.
516-
*/
517-
static struct page *xdp_linearize_page(struct receive_queue *rq,
518-
u16 *num_buf,
519-
struct page *p,
520-
int offset,
521-
unsigned int *len)
522-
{
523-
struct page *page = alloc_page(GFP_ATOMIC);
524-
unsigned int page_off = VIRTIO_XDP_HEADROOM;
525-
526-
if (!page)
527-
return NULL;
528-
529-
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
530-
page_off += *len;
531-
532-
while (--*num_buf) {
533-
unsigned int buflen;
534-
void *buf;
535-
int off;
536-
537-
buf = virtqueue_get_buf(rq->vq, &buflen);
538-
if (unlikely(!buf))
539-
goto err_buf;
540-
541-
p = virt_to_head_page(buf);
542-
off = buf - page_address(p);
543-
544-
/* guard against a misconfigured or uncooperative backend that
545-
* is sending packet larger than the MTU.
546-
*/
547-
if ((page_off + buflen) > PAGE_SIZE) {
548-
put_page(p);
549-
goto err_buf;
550-
}
551-
552-
memcpy(page_address(page) + page_off,
553-
page_address(p) + off, buflen);
554-
page_off += buflen;
555-
put_page(p);
556-
}
557-
558-
/* Headroom does not contribute to packet length */
559-
*len = page_off - VIRTIO_XDP_HEADROOM;
560-
return page;
561-
err_buf:
562-
__free_pages(page, 0);
563-
return NULL;
564-
}
565-
566592
static struct sk_buff *receive_mergeable(struct net_device *dev,
567593
struct virtnet_info *vi,
568594
struct receive_queue *rq,
@@ -577,6 +603,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
577603
struct sk_buff *head_skb, *curr_skb;
578604
struct bpf_prog *xdp_prog;
579605
unsigned int truesize;
606+
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
580607

581608
head_skb = NULL;
582609

@@ -589,10 +616,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
589616
u32 act;
590617

591618
/* This happens when rx buffer size is underestimated */
592-
if (unlikely(num_buf > 1)) {
619+
if (unlikely(num_buf > 1 ||
620+
headroom < virtnet_get_headroom(vi))) {
593621
/* linearize data for XDP */
594622
xdp_page = xdp_linearize_page(rq, &num_buf,
595-
page, offset, &len);
623+
page, offset,
624+
VIRTIO_XDP_HEADROOM,
625+
&len);
596626
if (!xdp_page)
597627
goto err_xdp;
598628
offset = VIRTIO_XDP_HEADROOM;
@@ -835,7 +865,6 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
835865
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
836866
if (err < 0)
837867
put_page(virt_to_head_page(buf));
838-
839868
return err;
840869
}
841870

@@ -1840,7 +1869,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
18401869
}
18411870

18421871
static int init_vqs(struct virtnet_info *vi);
1843-
static void _remove_vq_common(struct virtnet_info *vi);
18441872

18451873
static int virtnet_restore_up(struct virtio_device *vdev)
18461874
{
@@ -1869,39 +1897,6 @@ static int virtnet_restore_up(struct virtio_device *vdev)
18691897
return err;
18701898
}
18711899

1872-
static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
1873-
{
1874-
struct virtio_device *dev = vi->vdev;
1875-
int ret;
1876-
1877-
virtio_config_disable(dev);
1878-
dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
1879-
virtnet_freeze_down(dev);
1880-
_remove_vq_common(vi);
1881-
1882-
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
1883-
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
1884-
1885-
ret = virtio_finalize_features(dev);
1886-
if (ret)
1887-
goto err;
1888-
1889-
vi->xdp_queue_pairs = xdp_qp;
1890-
ret = virtnet_restore_up(dev);
1891-
if (ret)
1892-
goto err;
1893-
ret = _virtnet_set_queues(vi, curr_qp);
1894-
if (ret)
1895-
goto err;
1896-
1897-
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
1898-
virtio_config_enable(dev);
1899-
return 0;
1900-
err:
1901-
virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
1902-
return ret;
1903-
}
1904-
19051900
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
19061901
struct netlink_ext_ack *extack)
19071902
{
@@ -1948,35 +1943,29 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
19481943
return PTR_ERR(prog);
19491944
}
19501945

1951-
/* Changing the headroom in buffers is a disruptive operation because
1952-
* existing buffers must be flushed and reallocated. This will happen
1953-
* when a xdp program is initially added or xdp is disabled by removing
1954-
* the xdp program resulting in number of XDP queues changing.
1955-
*/
1956-
if (vi->xdp_queue_pairs != xdp_qp) {
1957-
err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
1958-
if (err) {
1959-
dev_warn(&dev->dev, "XDP reset failure.\n");
1960-
goto virtio_reset_err;
1961-
}
1962-
}
1946+
/* Make sure NAPI is not using any XDP TX queues for RX. */
1947+
for (i = 0; i < vi->max_queue_pairs; i++)
1948+
napi_disable(&vi->rq[i].napi);
19631949

19641950
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
1951+
err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
1952+
if (err)
1953+
goto err;
1954+
vi->xdp_queue_pairs = xdp_qp;
19651955

19661956
for (i = 0; i < vi->max_queue_pairs; i++) {
19671957
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
19681958
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
19691959
if (old_prog)
19701960
bpf_prog_put(old_prog);
1961+
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
19711962
}
19721963

19731964
return 0;
19741965

1975-
virtio_reset_err:
1976-
/* On reset error do our best to unwind XDP changes inflight and return
1977-
* error up to user space for resolution. The underlying reset hung on
1978-
* us so not much we can do here.
1979-
*/
1966+
err:
1967+
for (i = 0; i < vi->max_queue_pairs; i++)
1968+
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
19801969
if (prog)
19811970
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
19821971
return err;
@@ -2622,15 +2611,6 @@ static int virtnet_probe(struct virtio_device *vdev)
26222611
return err;
26232612
}
26242613

2625-
static void _remove_vq_common(struct virtnet_info *vi)
2626-
{
2627-
vi->vdev->config->reset(vi->vdev);
2628-
free_unused_bufs(vi);
2629-
_free_receive_bufs(vi);
2630-
free_receive_page_frags(vi);
2631-
virtnet_del_vqs(vi);
2632-
}
2633-
26342614
static void remove_vq_common(struct virtnet_info *vi)
26352615
{
26362616
vi->vdev->config->reset(vi->vdev);

0 commit comments

Comments
 (0)