@@ -407,6 +407,69 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
407
407
return vi -> xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0 ;
408
408
}
409
409
410
+ /* We copy the packet for XDP in the following cases:
411
+ *
412
+ * 1) Packet is scattered across multiple rx buffers.
413
+ * 2) Headroom space is insufficient.
414
+ *
415
+ * This is inefficient but it's a temporary condition that
416
+ * we hit right after XDP is enabled and until queue is refilled
417
+ * with large buffers with sufficient headroom - so it should affect
418
+ * at most queue size packets.
419
+ * Afterwards, the conditions to enable
420
+ * XDP should preclude the underlying device from sending packets
421
+ * across multiple buffers (num_buf > 1), and we make sure buffers
422
+ * have enough headroom.
423
+ */
424
+ static struct page * xdp_linearize_page (struct receive_queue * rq ,
425
+ u16 * num_buf ,
426
+ struct page * p ,
427
+ int offset ,
428
+ int page_off ,
429
+ unsigned int * len )
430
+ {
431
+ struct page * page = alloc_page (GFP_ATOMIC );
432
+
433
+ if (!page )
434
+ return NULL ;
435
+
436
+ memcpy (page_address (page ) + page_off , page_address (p ) + offset , * len );
437
+ page_off += * len ;
438
+
439
+ while (-- * num_buf ) {
440
+ unsigned int buflen ;
441
+ void * buf ;
442
+ int off ;
443
+
444
+ buf = virtqueue_get_buf (rq -> vq , & buflen );
445
+ if (unlikely (!buf ))
446
+ goto err_buf ;
447
+
448
+ p = virt_to_head_page (buf );
449
+ off = buf - page_address (p );
450
+
451
+ /* guard against a misconfigured or uncooperative backend that
452
+ * is sending packet larger than the MTU.
453
+ */
454
+ if ((page_off + buflen ) > PAGE_SIZE ) {
455
+ put_page (p );
456
+ goto err_buf ;
457
+ }
458
+
459
+ memcpy (page_address (page ) + page_off ,
460
+ page_address (p ) + off , buflen );
461
+ page_off += buflen ;
462
+ put_page (p );
463
+ }
464
+
465
+ /* Headroom does not contribute to packet length */
466
+ * len = page_off - VIRTIO_XDP_HEADROOM ;
467
+ return page ;
468
+ err_buf :
469
+ __free_pages (page , 0 );
470
+ return NULL ;
471
+ }
472
+
410
473
static struct sk_buff * receive_small (struct net_device * dev ,
411
474
struct virtnet_info * vi ,
412
475
struct receive_queue * rq ,
@@ -415,12 +478,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
415
478
{
416
479
struct sk_buff * skb ;
417
480
struct bpf_prog * xdp_prog ;
418
- unsigned int xdp_headroom = virtnet_get_headroom ( vi ) ;
481
+ unsigned int xdp_headroom = ( unsigned long ) ctx ;
419
482
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom ;
420
483
unsigned int headroom = vi -> hdr_len + header_offset ;
421
484
unsigned int buflen = SKB_DATA_ALIGN (GOOD_PACKET_LEN + headroom ) +
422
485
SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
486
+ struct page * page = virt_to_head_page (buf );
423
487
unsigned int delta = 0 ;
488
+ struct page * xdp_page ;
424
489
len -= vi -> hdr_len ;
425
490
426
491
rcu_read_lock ();
@@ -434,6 +499,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
434
499
if (unlikely (hdr -> hdr .gso_type || hdr -> hdr .flags ))
435
500
goto err_xdp ;
436
501
502
+ if (unlikely (xdp_headroom < virtnet_get_headroom (vi ))) {
503
+ int offset = buf - page_address (page ) + header_offset ;
504
+ unsigned int tlen = len + vi -> hdr_len ;
505
+ u16 num_buf = 1 ;
506
+
507
+ xdp_headroom = virtnet_get_headroom (vi );
508
+ header_offset = VIRTNET_RX_PAD + xdp_headroom ;
509
+ headroom = vi -> hdr_len + header_offset ;
510
+ buflen = SKB_DATA_ALIGN (GOOD_PACKET_LEN + headroom ) +
511
+ SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
512
+ xdp_page = xdp_linearize_page (rq , & num_buf , page ,
513
+ offset , header_offset ,
514
+ & tlen );
515
+ if (!xdp_page )
516
+ goto err_xdp ;
517
+
518
+ buf = page_address (xdp_page );
519
+ put_page (page );
520
+ page = xdp_page ;
521
+ }
522
+
437
523
xdp .data_hard_start = buf + VIRTNET_RX_PAD + vi -> hdr_len ;
438
524
xdp .data = xdp .data_hard_start + xdp_headroom ;
439
525
xdp .data_end = xdp .data + len ;
@@ -462,7 +548,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
462
548
463
549
skb = build_skb (buf , buflen );
464
550
if (!skb ) {
465
- put_page (virt_to_head_page ( buf ) );
551
+ put_page (page );
466
552
goto err ;
467
553
}
468
554
skb_reserve (skb , headroom - delta );
@@ -478,7 +564,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
478
564
err_xdp :
479
565
rcu_read_unlock ();
480
566
dev -> stats .rx_dropped ++ ;
481
- put_page (virt_to_head_page ( buf ) );
567
+ put_page (page );
482
568
xdp_xmit :
483
569
return NULL ;
484
570
}
@@ -503,66 +589,6 @@ static struct sk_buff *receive_big(struct net_device *dev,
503
589
return NULL ;
504
590
}
505
591
506
- /* The conditions to enable XDP should preclude the underlying device from
507
- * sending packets across multiple buffers (num_buf > 1). However per spec
508
- * it does not appear to be illegal to do so but rather just against convention.
509
- * So in order to avoid making a system unresponsive the packets are pushed
510
- * into a page and the XDP program is run. This will be extremely slow and we
511
- * push a warning to the user to fix this as soon as possible. Fixing this may
512
- * require resolving the underlying hardware to determine why multiple buffers
513
- * are being received or simply loading the XDP program in the ingress stack
514
- * after the skb is built because there is no advantage to running it here
515
- * anymore.
516
- */
517
- static struct page * xdp_linearize_page (struct receive_queue * rq ,
518
- u16 * num_buf ,
519
- struct page * p ,
520
- int offset ,
521
- unsigned int * len )
522
- {
523
- struct page * page = alloc_page (GFP_ATOMIC );
524
- unsigned int page_off = VIRTIO_XDP_HEADROOM ;
525
-
526
- if (!page )
527
- return NULL ;
528
-
529
- memcpy (page_address (page ) + page_off , page_address (p ) + offset , * len );
530
- page_off += * len ;
531
-
532
- while (-- * num_buf ) {
533
- unsigned int buflen ;
534
- void * buf ;
535
- int off ;
536
-
537
- buf = virtqueue_get_buf (rq -> vq , & buflen );
538
- if (unlikely (!buf ))
539
- goto err_buf ;
540
-
541
- p = virt_to_head_page (buf );
542
- off = buf - page_address (p );
543
-
544
- /* guard against a misconfigured or uncooperative backend that
545
- * is sending packet larger than the MTU.
546
- */
547
- if ((page_off + buflen ) > PAGE_SIZE ) {
548
- put_page (p );
549
- goto err_buf ;
550
- }
551
-
552
- memcpy (page_address (page ) + page_off ,
553
- page_address (p ) + off , buflen );
554
- page_off += buflen ;
555
- put_page (p );
556
- }
557
-
558
- /* Headroom does not contribute to packet length */
559
- * len = page_off - VIRTIO_XDP_HEADROOM ;
560
- return page ;
561
- err_buf :
562
- __free_pages (page , 0 );
563
- return NULL ;
564
- }
565
-
566
592
static struct sk_buff * receive_mergeable (struct net_device * dev ,
567
593
struct virtnet_info * vi ,
568
594
struct receive_queue * rq ,
@@ -577,6 +603,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
577
603
struct sk_buff * head_skb , * curr_skb ;
578
604
struct bpf_prog * xdp_prog ;
579
605
unsigned int truesize ;
606
+ unsigned int headroom = mergeable_ctx_to_headroom (ctx );
580
607
581
608
head_skb = NULL ;
582
609
@@ -589,10 +616,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
589
616
u32 act ;
590
617
591
618
/* This happens when rx buffer size is underestimated */
592
- if (unlikely (num_buf > 1 )) {
619
+ if (unlikely (num_buf > 1 ||
620
+ headroom < virtnet_get_headroom (vi ))) {
593
621
/* linearize data for XDP */
594
622
xdp_page = xdp_linearize_page (rq , & num_buf ,
595
- page , offset , & len );
623
+ page , offset ,
624
+ VIRTIO_XDP_HEADROOM ,
625
+ & len );
596
626
if (!xdp_page )
597
627
goto err_xdp ;
598
628
offset = VIRTIO_XDP_HEADROOM ;
@@ -835,7 +865,6 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
835
865
err = virtqueue_add_inbuf_ctx (rq -> vq , rq -> sg , 1 , buf , ctx , gfp );
836
866
if (err < 0 )
837
867
put_page (virt_to_head_page (buf ));
838
-
839
868
return err ;
840
869
}
841
870
@@ -1840,7 +1869,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
1840
1869
}
1841
1870
1842
1871
static int init_vqs (struct virtnet_info * vi );
1843
- static void _remove_vq_common (struct virtnet_info * vi );
1844
1872
1845
1873
static int virtnet_restore_up (struct virtio_device * vdev )
1846
1874
{
@@ -1869,39 +1897,6 @@ static int virtnet_restore_up(struct virtio_device *vdev)
1869
1897
return err ;
1870
1898
}
1871
1899
1872
- static int virtnet_reset (struct virtnet_info * vi , int curr_qp , int xdp_qp )
1873
- {
1874
- struct virtio_device * dev = vi -> vdev ;
1875
- int ret ;
1876
-
1877
- virtio_config_disable (dev );
1878
- dev -> failed = dev -> config -> get_status (dev ) & VIRTIO_CONFIG_S_FAILED ;
1879
- virtnet_freeze_down (dev );
1880
- _remove_vq_common (vi );
1881
-
1882
- virtio_add_status (dev , VIRTIO_CONFIG_S_ACKNOWLEDGE );
1883
- virtio_add_status (dev , VIRTIO_CONFIG_S_DRIVER );
1884
-
1885
- ret = virtio_finalize_features (dev );
1886
- if (ret )
1887
- goto err ;
1888
-
1889
- vi -> xdp_queue_pairs = xdp_qp ;
1890
- ret = virtnet_restore_up (dev );
1891
- if (ret )
1892
- goto err ;
1893
- ret = _virtnet_set_queues (vi , curr_qp );
1894
- if (ret )
1895
- goto err ;
1896
-
1897
- virtio_add_status (dev , VIRTIO_CONFIG_S_DRIVER_OK );
1898
- virtio_config_enable (dev );
1899
- return 0 ;
1900
- err :
1901
- virtio_add_status (dev , VIRTIO_CONFIG_S_FAILED );
1902
- return ret ;
1903
- }
1904
-
1905
1900
static int virtnet_xdp_set (struct net_device * dev , struct bpf_prog * prog ,
1906
1901
struct netlink_ext_ack * extack )
1907
1902
{
@@ -1948,35 +1943,29 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1948
1943
return PTR_ERR (prog );
1949
1944
}
1950
1945
1951
- /* Changing the headroom in buffers is a disruptive operation because
1952
- * existing buffers must be flushed and reallocated. This will happen
1953
- * when a xdp program is initially added or xdp is disabled by removing
1954
- * the xdp program resulting in number of XDP queues changing.
1955
- */
1956
- if (vi -> xdp_queue_pairs != xdp_qp ) {
1957
- err = virtnet_reset (vi , curr_qp + xdp_qp , xdp_qp );
1958
- if (err ) {
1959
- dev_warn (& dev -> dev , "XDP reset failure.\n" );
1960
- goto virtio_reset_err ;
1961
- }
1962
- }
1946
+ /* Make sure NAPI is not using any XDP TX queues for RX. */
1947
+ for (i = 0 ; i < vi -> max_queue_pairs ; i ++ )
1948
+ napi_disable (& vi -> rq [i ].napi );
1963
1949
1964
1950
netif_set_real_num_rx_queues (dev , curr_qp + xdp_qp );
1951
+ err = _virtnet_set_queues (vi , curr_qp + xdp_qp );
1952
+ if (err )
1953
+ goto err ;
1954
+ vi -> xdp_queue_pairs = xdp_qp ;
1965
1955
1966
1956
for (i = 0 ; i < vi -> max_queue_pairs ; i ++ ) {
1967
1957
old_prog = rtnl_dereference (vi -> rq [i ].xdp_prog );
1968
1958
rcu_assign_pointer (vi -> rq [i ].xdp_prog , prog );
1969
1959
if (old_prog )
1970
1960
bpf_prog_put (old_prog );
1961
+ virtnet_napi_enable (vi -> rq [i ].vq , & vi -> rq [i ].napi );
1971
1962
}
1972
1963
1973
1964
return 0 ;
1974
1965
1975
- virtio_reset_err :
1976
- /* On reset error do our best to unwind XDP changes inflight and return
1977
- * error up to user space for resolution. The underlying reset hung on
1978
- * us so not much we can do here.
1979
- */
1966
+ err :
1967
+ for (i = 0 ; i < vi -> max_queue_pairs ; i ++ )
1968
+ virtnet_napi_enable (vi -> rq [i ].vq , & vi -> rq [i ].napi );
1980
1969
if (prog )
1981
1970
bpf_prog_sub (prog , vi -> max_queue_pairs - 1 );
1982
1971
return err ;
@@ -2622,15 +2611,6 @@ static int virtnet_probe(struct virtio_device *vdev)
2622
2611
return err ;
2623
2612
}
2624
2613
2625
- static void _remove_vq_common (struct virtnet_info * vi )
2626
- {
2627
- vi -> vdev -> config -> reset (vi -> vdev );
2628
- free_unused_bufs (vi );
2629
- _free_receive_bufs (vi );
2630
- free_receive_page_frags (vi );
2631
- virtnet_del_vqs (vi );
2632
- }
2633
-
2634
2614
static void remove_vq_common (struct virtnet_info * vi )
2635
2615
{
2636
2616
vi -> vdev -> config -> reset (vi -> vdev );
0 commit comments