Skip to content

Commit 6b1e6cc

Browse files
jasowangmstsirkin
authored andcommitted
vhost: new device IOTLB API
This patch tries to implement an device IOTLB for vhost. This could be used with userspace(qemu) implementation of DMA remapping to emulate an IOMMU for the guest. The idea is simple, cache the translation in a software device IOTLB (which is implemented as an interval tree) in vhost and use vhost_net file descriptor for reporting IOTLB miss and IOTLB update/invalidation. When vhost meets an IOTLB miss, the fault address, size and access can be read from the file. After userspace finishes the translation, it writes the translated address to the vhost_net file to update the device IOTLB. When device IOTLB is enabled by setting VIRTIO_F_IOMMU_PLATFORM all vq addresses set by ioctl are treated as iova instead of virtual address and the accessing can only be done through IOTLB instead of direct userspace memory access. Before each round or vq processing, all vq metadata is prefetched in device IOTLB to make sure no translation fault happens during vq processing. In most cases, virtqueues are contiguous even in virtual address space. The IOTLB translation for virtqueue itself may make it a little slower. We might add fast path cache on top of this patch. Signed-off-by: Jason Wang <jasowang@redhat.com> [mst: use virtio feature bit: VHOST_F_DEVICE_IOTLB -> VIRTIO_F_IOMMU_PLATFORM ] [mst: fix build warnings ] Signed-off-by: Michael S. Tsirkin <mst@redhat.com> [ weiyj.lk: missing unlock on error ] Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com>
1 parent b2fbd8b commit 6b1e6cc

File tree

4 files changed

+705
-50
lines changed

4 files changed

+705
-50
lines changed

drivers/vhost/net.c

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
6161
enum {
6262
VHOST_NET_FEATURES = VHOST_FEATURES |
6363
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
64-
(1ULL << VIRTIO_NET_F_MRG_RXBUF)
64+
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
65+
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
6566
};
6667

6768
enum {
@@ -308,7 +309,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
308309
{
309310
unsigned long uninitialized_var(endtime);
310311
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
311-
out_num, in_num, NULL, NULL);
312+
out_num, in_num, NULL, NULL);
312313

313314
if (r == vq->num && vq->busyloop_timeout) {
314315
preempt_disable();
@@ -318,7 +319,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
318319
cpu_relax_lowlatency();
319320
preempt_enable();
320321
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
321-
out_num, in_num, NULL, NULL);
322+
out_num, in_num, NULL, NULL);
322323
}
323324

324325
return r;
@@ -351,6 +352,9 @@ static void handle_tx(struct vhost_net *net)
351352
if (!sock)
352353
goto out;
353354

355+
if (!vq_iotlb_prefetch(vq))
356+
goto out;
357+
354358
vhost_disable_notify(&net->dev, vq);
355359

356360
hdr_size = nvq->vhost_hlen;
@@ -612,6 +616,10 @@ static void handle_rx(struct vhost_net *net)
612616
sock = vq->private_data;
613617
if (!sock)
614618
goto out;
619+
620+
if (!vq_iotlb_prefetch(vq))
621+
goto out;
622+
615623
vhost_disable_notify(&net->dev, vq);
616624

617625
vhost_hlen = nvq->vhost_hlen;
@@ -1080,10 +1088,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
10801088
}
10811089
mutex_lock(&n->dev.mutex);
10821090
if ((features & (1 << VHOST_F_LOG_ALL)) &&
1083-
!vhost_log_access_ok(&n->dev)) {
1084-
mutex_unlock(&n->dev.mutex);
1085-
return -EFAULT;
1091+
!vhost_log_access_ok(&n->dev))
1092+
goto out_unlock;
1093+
1094+
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
1095+
if (vhost_init_device_iotlb(&n->dev, true))
1096+
goto out_unlock;
10861097
}
1098+
10871099
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
10881100
mutex_lock(&n->vqs[i].vq.mutex);
10891101
n->vqs[i].vq.acked_features = features;
@@ -1093,6 +1105,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
10931105
}
10941106
mutex_unlock(&n->dev.mutex);
10951107
return 0;
1108+
1109+
out_unlock:
1110+
mutex_unlock(&n->dev.mutex);
1111+
return -EFAULT;
10961112
}
10971113

10981114
static long vhost_net_set_owner(struct vhost_net *n)
@@ -1166,9 +1182,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
11661182
}
11671183
#endif
11681184

1185+
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
1186+
{
1187+
struct file *file = iocb->ki_filp;
1188+
struct vhost_net *n = file->private_data;
1189+
struct vhost_dev *dev = &n->dev;
1190+
int noblock = file->f_flags & O_NONBLOCK;
1191+
1192+
return vhost_chr_read_iter(dev, to, noblock);
1193+
}
1194+
1195+
static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
1196+
struct iov_iter *from)
1197+
{
1198+
struct file *file = iocb->ki_filp;
1199+
struct vhost_net *n = file->private_data;
1200+
struct vhost_dev *dev = &n->dev;
1201+
1202+
return vhost_chr_write_iter(dev, from);
1203+
}
1204+
1205+
static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
1206+
{
1207+
struct vhost_net *n = file->private_data;
1208+
struct vhost_dev *dev = &n->dev;
1209+
1210+
return vhost_chr_poll(file, dev, wait);
1211+
}
1212+
11691213
static const struct file_operations vhost_net_fops = {
11701214
.owner = THIS_MODULE,
11711215
.release = vhost_net_release,
1216+
.read_iter = vhost_net_chr_read_iter,
1217+
.write_iter = vhost_net_chr_write_iter,
1218+
.poll = vhost_net_chr_poll,
11721219
.unlocked_ioctl = vhost_net_ioctl,
11731220
#ifdef CONFIG_COMPAT
11741221
.compat_ioctl = vhost_net_compat_ioctl,

0 commit comments

Comments
 (0)