Skip to content

Commit db570d7

Browse files
Artemy-Mellanoxdledford
authored andcommitted
IB/mlx5: Add ODP support to MW
Internally MW implemented as KLM MKey and filled by userspace UMR postsends. Handle pagefault trigered by operations on this MKeys. Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 1b7dbc2 commit db570d7

File tree

3 files changed

+120
-43
lines changed

3 files changed

+120
-43
lines changed

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,7 @@ struct mlx5_ib_mr {
513513
struct mlx5_ib_mw {
514514
struct ib_mw ibmw;
515515
struct mlx5_core_mkey mmkey;
516+
int ndescs;
516517
};
517518

518519
struct mlx5_ib_umr_context {

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
16881688

16891689
mw->mmkey.type = MLX5_MKEY_MW;
16901690
mw->ibmw.rkey = mw->mmkey.key;
1691+
mw->ndescs = ndescs;
16911692

16921693
resp.response_length = min(offsetof(typeof(resp), response_length) +
16931694
sizeof(resp.response_length), udata->outlen);

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 118 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -288,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
288288
return;
289289
}
290290

291-
static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
292-
u32 key)
293-
{
294-
u32 base_key = mlx5_base_mkey(key);
295-
struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
296-
struct mlx5_ib_mr *mr;
297-
298-
if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR)
299-
return NULL;
300-
301-
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
302-
303-
if (!mr->live)
304-
return NULL;
305-
306-
return container_of(mmkey, struct mlx5_ib_mr, mmkey);
307-
}
308-
309291
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
310292
struct mlx5_pagefault *pfault,
311293
int error)
@@ -625,6 +607,14 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
625607
return ret;
626608
}
627609

610+
struct pf_frame {
611+
struct pf_frame *next;
612+
u32 key;
613+
u64 io_virt;
614+
size_t bcnt;
615+
int depth;
616+
};
617+
628618
/*
629619
* Handle a single data segment in a page-fault WQE or RDMA region.
630620
*
@@ -641,43 +631,128 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
641631
u32 *bytes_committed,
642632
u32 *bytes_mapped)
643633
{
644-
int npages = 0, srcu_key, ret;
634+
int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
635+
struct pf_frame *head = NULL, *frame;
636+
struct mlx5_core_mkey *mmkey;
637+
struct mlx5_ib_mw *mw;
645638
struct mlx5_ib_mr *mr;
646-
size_t size;
639+
struct mlx5_klm *pklm;
640+
u32 *out = NULL;
641+
size_t offset;
647642

648643
srcu_key = srcu_read_lock(&dev->mr_srcu);
649-
mr = mlx5_ib_odp_find_mr_lkey(dev, key);
650-
/*
651-
* If we didn't find the MR, it means the MR was closed while we were
652-
* handling the ODP event. In this case we return -EFAULT so that the
653-
* QP will be closed.
654-
*/
655-
if (!mr || !mr->ibmr.pd) {
656-
mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
657-
key);
644+
645+
io_virt += *bytes_committed;
646+
bcnt -= *bytes_committed;
647+
648+
next_mr:
649+
mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
650+
if (!mmkey || mmkey->key != key) {
651+
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
658652
ret = -EFAULT;
659653
goto srcu_unlock;
660654
}
661-
if (!mr->umem->odp_data) {
662-
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
663-
key);
664-
if (bytes_mapped)
665-
*bytes_mapped +=
666-
(bcnt - *bytes_committed);
655+
656+
switch (mmkey->type) {
657+
case MLX5_MKEY_MR:
658+
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
659+
if (!mr->live || !mr->ibmr.pd) {
660+
mlx5_ib_dbg(dev, "got dead MR\n");
661+
ret = -EFAULT;
662+
goto srcu_unlock;
663+
}
664+
665+
ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
666+
if (ret < 0)
667+
goto srcu_unlock;
668+
669+
npages += ret;
670+
ret = 0;
671+
break;
672+
673+
case MLX5_MKEY_MW:
674+
mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
675+
676+
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
677+
mlx5_ib_dbg(dev, "indirection level exceeded\n");
678+
ret = -EFAULT;
679+
goto srcu_unlock;
680+
}
681+
682+
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
683+
sizeof(*pklm) * (mw->ndescs - 2);
684+
685+
if (outlen > cur_outlen) {
686+
kfree(out);
687+
out = kzalloc(outlen, GFP_KERNEL);
688+
if (!out) {
689+
ret = -ENOMEM;
690+
goto srcu_unlock;
691+
}
692+
cur_outlen = outlen;
693+
}
694+
695+
pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
696+
bsf0_klm0_pas_mtt0_1);
697+
698+
ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
699+
if (ret)
700+
goto srcu_unlock;
701+
702+
offset = io_virt - MLX5_GET64(query_mkey_out, out,
703+
memory_key_mkey_entry.start_addr);
704+
705+
for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
706+
if (offset >= be32_to_cpu(pklm->bcount)) {
707+
offset -= be32_to_cpu(pklm->bcount);
708+
continue;
709+
}
710+
711+
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
712+
if (!frame) {
713+
ret = -ENOMEM;
714+
goto srcu_unlock;
715+
}
716+
717+
frame->key = be32_to_cpu(pklm->key);
718+
frame->io_virt = be64_to_cpu(pklm->va) + offset;
719+
frame->bcnt = min_t(size_t, bcnt,
720+
be32_to_cpu(pklm->bcount) - offset);
721+
frame->depth = depth + 1;
722+
frame->next = head;
723+
head = frame;
724+
725+
bcnt -= frame->bcnt;
726+
}
727+
break;
728+
729+
default:
730+
mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
731+
ret = -EFAULT;
667732
goto srcu_unlock;
668733
}
669734

670-
/*
671-
* Avoid branches - this code will perform correctly
672-
* in all iterations (in iteration 2 and above,
673-
* bytes_committed == 0).
674-
*/
675-
io_virt += *bytes_committed;
676-
bcnt -= *bytes_committed;
735+
if (head) {
736+
frame = head;
737+
head = frame->next;
677738

678-
npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
739+
key = frame->key;
740+
io_virt = frame->io_virt;
741+
bcnt = frame->bcnt;
742+
depth = frame->depth;
743+
kfree(frame);
744+
745+
goto next_mr;
746+
}
679747

680748
srcu_unlock:
749+
while (head) {
750+
frame = head;
751+
head = frame->next;
752+
kfree(frame);
753+
}
754+
kfree(out);
755+
681756
srcu_read_unlock(&dev->mr_srcu, srcu_key);
682757
*bytes_committed = 0;
683758
return ret ? ret : npages;

0 commit comments

Comments
 (0)