Skip to content

Commit f744c4b

Browse files
committed
Merge branch 'vhost_net-Avoid-vq-kicks-during-busyloop'
Toshiaki Makita says: ==================== vhost_net: Avoid vq kicks during busyloop Under heavy load vhost tx busypoll tend not to suppress vq kicks, which causes poor guest tx performance. The detailed scenario is described in commitlog of patch 2. Rx seems not to have that serious problem, but for consistency I made a similar change on rx to avoid rx wakeups (patch 3). Additionary patch 4 is to avoid rx kicks under heavy load during busypoll. Tx performance is greatly improved by this change. I don't see notable performance change on rx with this series though. Performance numbers (tx): - Bulk transfer from guest to external physical server. [Guest]->vhost_net->tap--(XDP_REDIRECT)-->i40e --(wire)--> [Server] - Set 10us busypoll. - Guest disables checksum and TSO because of host XDP. - Measured single flow Mbps by netperf, and kicks by perf kvm stat (EPT_MISCONFIG event). Before After Mbps kicks/s Mbps kicks/s UDP_STREAM 1472byte 247758 27 Send 3645.37 6958.10 Recv 3588.56 6958.10 1byte 9865 37 Send 4.34 5.43 Recv 4.17 5.26 TCP_STREAM 8801.03 45794 9592.77 2884 v2: - Split patches into 3 parts (renaming variables, tx-kick fix, rx-wakeup fix). - Avoid rx-kicks too (patch 4). - Don't memorize endtime as it is not needed for now. ==================== Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents e7e3728 + 6369fec commit f744c4b

File tree

1 file changed

+60
-35
lines changed

1 file changed

+60
-35
lines changed

drivers/vhost/net.c

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -396,13 +396,10 @@ static inline unsigned long busy_clock(void)
396396
return local_clock() >> 10;
397397
}
398398

399-
static bool vhost_can_busy_poll(struct vhost_dev *dev,
400-
unsigned long endtime)
399+
static bool vhost_can_busy_poll(unsigned long endtime)
401400
{
402-
return likely(!need_resched()) &&
403-
likely(!time_after(busy_clock(), endtime)) &&
404-
likely(!signal_pending(current)) &&
405-
!vhost_has_work(dev);
401+
return likely(!need_resched() && !time_after(busy_clock(), endtime) &&
402+
!signal_pending(current));
406403
}
407404

408405
static void vhost_net_disable_vq(struct vhost_net *n,
@@ -434,7 +431,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
434431
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
435432
struct vhost_virtqueue *vq,
436433
struct iovec iov[], unsigned int iov_size,
437-
unsigned int *out_num, unsigned int *in_num)
434+
unsigned int *out_num, unsigned int *in_num,
435+
bool *busyloop_intr)
438436
{
439437
unsigned long uninitialized_var(endtime);
440438
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
@@ -443,9 +441,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
443441
if (r == vq->num && vq->busyloop_timeout) {
444442
preempt_disable();
445443
endtime = busy_clock() + vq->busyloop_timeout;
446-
while (vhost_can_busy_poll(vq->dev, endtime) &&
447-
vhost_vq_avail_empty(vq->dev, vq))
444+
while (vhost_can_busy_poll(endtime)) {
445+
if (vhost_has_work(vq->dev)) {
446+
*busyloop_intr = true;
447+
break;
448+
}
449+
if (!vhost_vq_avail_empty(vq->dev, vq))
450+
break;
448451
cpu_relax();
452+
}
449453
preempt_enable();
450454
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
451455
out_num, in_num, NULL, NULL);
@@ -501,20 +505,24 @@ static void handle_tx(struct vhost_net *net)
501505
zcopy = nvq->ubufs;
502506

503507
for (;;) {
508+
bool busyloop_intr;
509+
504510
/* Release DMAs done buffers first */
505511
if (zcopy)
506512
vhost_zerocopy_signal_used(net, vq);
507513

508-
514+
busyloop_intr = false;
509515
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
510516
ARRAY_SIZE(vq->iov),
511-
&out, &in);
517+
&out, &in, &busyloop_intr);
512518
/* On error, stop handling until the next kick. */
513519
if (unlikely(head < 0))
514520
break;
515521
/* Nothing new? Wait for eventfd to tell us they refilled. */
516522
if (head == vq->num) {
517-
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
523+
if (unlikely(busyloop_intr)) {
524+
vhost_poll_queue(&vq->poll);
525+
} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
518526
vhost_disable_notify(&net->dev, vq);
519527
continue;
520528
}
@@ -645,41 +653,50 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
645653
nvq->done_idx = 0;
646654
}
647655

648-
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
656+
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
657+
bool *busyloop_intr)
649658
{
650-
struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
651-
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
652-
struct vhost_virtqueue *vq = &nvq->vq;
659+
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
660+
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
661+
struct vhost_virtqueue *rvq = &rnvq->vq;
662+
struct vhost_virtqueue *tvq = &tnvq->vq;
653663
unsigned long uninitialized_var(endtime);
654-
int len = peek_head_len(rvq, sk);
664+
int len = peek_head_len(rnvq, sk);
655665

656-
if (!len && vq->busyloop_timeout) {
666+
if (!len && tvq->busyloop_timeout) {
657667
/* Flush batched heads first */
658-
vhost_rx_signal_used(rvq);
668+
vhost_rx_signal_used(rnvq);
659669
/* Both tx vq and rx socket were polled here */
660-
mutex_lock_nested(&vq->mutex, 1);
661-
vhost_disable_notify(&net->dev, vq);
670+
mutex_lock_nested(&tvq->mutex, 1);
671+
vhost_disable_notify(&net->dev, tvq);
662672

663673
preempt_disable();
664-
endtime = busy_clock() + vq->busyloop_timeout;
674+
endtime = busy_clock() + tvq->busyloop_timeout;
665675

666-
while (vhost_can_busy_poll(&net->dev, endtime) &&
667-
!sk_has_rx_data(sk) &&
668-
vhost_vq_avail_empty(&net->dev, vq))
676+
while (vhost_can_busy_poll(endtime)) {
677+
if (vhost_has_work(&net->dev)) {
678+
*busyloop_intr = true;
679+
break;
680+
}
681+
if ((sk_has_rx_data(sk) &&
682+
!vhost_vq_avail_empty(&net->dev, rvq)) ||
683+
!vhost_vq_avail_empty(&net->dev, tvq))
684+
break;
669685
cpu_relax();
686+
}
670687

671688
preempt_enable();
672689

673-
if (!vhost_vq_avail_empty(&net->dev, vq))
674-
vhost_poll_queue(&vq->poll);
675-
else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
676-
vhost_disable_notify(&net->dev, vq);
677-
vhost_poll_queue(&vq->poll);
690+
if (!vhost_vq_avail_empty(&net->dev, tvq)) {
691+
vhost_poll_queue(&tvq->poll);
692+
} else if (unlikely(vhost_enable_notify(&net->dev, tvq))) {
693+
vhost_disable_notify(&net->dev, tvq);
694+
vhost_poll_queue(&tvq->poll);
678695
}
679696

680-
mutex_unlock(&vq->mutex);
697+
mutex_unlock(&tvq->mutex);
681698

682-
len = peek_head_len(rvq, sk);
699+
len = peek_head_len(rnvq, sk);
683700
}
684701

685702
return len;
@@ -786,6 +803,7 @@ static void handle_rx(struct vhost_net *net)
786803
s16 headcount;
787804
size_t vhost_hlen, sock_hlen;
788805
size_t vhost_len, sock_len;
806+
bool busyloop_intr = false;
789807
struct socket *sock;
790808
struct iov_iter fixup;
791809
__virtio16 num_buffers;
@@ -809,7 +827,8 @@ static void handle_rx(struct vhost_net *net)
809827
vq->log : NULL;
810828
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
811829

812-
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
830+
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
831+
&busyloop_intr))) {
813832
sock_len += sock_hlen;
814833
vhost_len = sock_len + vhost_hlen;
815834
headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
@@ -820,7 +839,9 @@ static void handle_rx(struct vhost_net *net)
820839
goto out;
821840
/* OK, now we need to know about added descriptors. */
822841
if (!headcount) {
823-
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
842+
if (unlikely(busyloop_intr)) {
843+
vhost_poll_queue(&vq->poll);
844+
} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
824845
/* They have slipped one in as we were
825846
* doing that: check again. */
826847
vhost_disable_notify(&net->dev, vq);
@@ -830,6 +851,7 @@ static void handle_rx(struct vhost_net *net)
830851
* they refilled. */
831852
goto out;
832853
}
854+
busyloop_intr = false;
833855
if (nvq->rx_ring)
834856
msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
835857
/* On overrun, truncate and discard */
@@ -896,7 +918,10 @@ static void handle_rx(struct vhost_net *net)
896918
goto out;
897919
}
898920
}
899-
vhost_net_enable_vq(net, vq);
921+
if (unlikely(busyloop_intr))
922+
vhost_poll_queue(&vq->poll);
923+
else
924+
vhost_net_enable_vq(net, vq);
900925
out:
901926
vhost_rx_signal_used(nvq);
902927
mutex_unlock(&vq->mutex);

0 commit comments

Comments
 (0)