Skip to content

Commit eab668a

Browse files
haggaierolandd
authored andcommitted
IB/mlx5: Add support for RDMA read/write responder page faults
Signed-off-by: Shachar Raindel <raindel@mellanox.com> Signed-off-by: Haggai Eran <haggaie@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
1 parent 7bdf65d commit eab668a

File tree

1 file changed

+79
-0
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+79
-0
lines changed

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
#include "mlx5_ib.h"
3737

38+
#define MAX_PREFETCH_LEN (4*1024*1024U)
39+
3840
struct workqueue_struct *mlx5_ib_page_fault_wq;
3941

4042
#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \
@@ -490,6 +492,80 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
490492
free_page((unsigned long)buffer);
491493
}
492494

495+
static int pages_in_range(u64 address, u32 length)
496+
{
497+
return (ALIGN(address + length, PAGE_SIZE) -
498+
(address & PAGE_MASK)) >> PAGE_SHIFT;
499+
}
500+
501+
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
502+
struct mlx5_ib_pfault *pfault)
503+
{
504+
struct mlx5_pagefault *mpfault = &pfault->mpfault;
505+
u64 address;
506+
u32 length;
507+
u32 prefetch_len = mpfault->bytes_committed;
508+
int prefetch_activated = 0;
509+
u32 rkey = mpfault->rdma.r_key;
510+
int ret;
511+
512+
/* The RDMA responder handler handles the page fault in two parts.
513+
* First it brings the necessary pages for the current packet
514+
* (and uses the pfault context), and then (after resuming the QP)
515+
* prefetches more pages. The second operation cannot use the pfault
516+
* context and therefore uses the dummy_pfault context allocated on
517+
* the stack */
518+
struct mlx5_ib_pfault dummy_pfault = {};
519+
520+
dummy_pfault.mpfault.bytes_committed = 0;
521+
522+
mpfault->rdma.rdma_va += mpfault->bytes_committed;
523+
mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
524+
mpfault->rdma.rdma_op_len);
525+
mpfault->bytes_committed = 0;
526+
527+
address = mpfault->rdma.rdma_va;
528+
length = mpfault->rdma.rdma_op_len;
529+
530+
/* For some operations, the hardware cannot tell the exact message
531+
* length, and in those cases it reports zero. Use prefetch
532+
* logic. */
533+
if (length == 0) {
534+
prefetch_activated = 1;
535+
length = mpfault->rdma.packet_size;
536+
prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
537+
}
538+
539+
ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
540+
NULL);
541+
if (ret == -EAGAIN) {
542+
/* We're racing with an invalidation, don't prefetch */
543+
prefetch_activated = 0;
544+
} else if (ret < 0 || pages_in_range(address, length) > ret) {
545+
mlx5_ib_page_fault_resume(qp, pfault, 1);
546+
return;
547+
}
548+
549+
mlx5_ib_page_fault_resume(qp, pfault, 0);
550+
551+
/* At this point, there might be a new pagefault already arriving in
552+
* the eq, switch to the dummy pagefault for the rest of the
553+
* processing. We're still OK with the objects being alive as the
554+
* work-queue is being fenced. */
555+
556+
if (prefetch_activated) {
557+
ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
558+
address,
559+
prefetch_len,
560+
NULL);
561+
if (ret < 0) {
562+
pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
563+
ret, prefetch_activated,
564+
qp->ibqp.qp_num, address, prefetch_len);
565+
}
566+
}
567+
}
568+
493569
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
494570
struct mlx5_ib_pfault *pfault)
495571
{
@@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
499575
case MLX5_PFAULT_SUBTYPE_WQE:
500576
mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
501577
break;
578+
case MLX5_PFAULT_SUBTYPE_RDMA:
579+
mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
580+
break;
502581
default:
503582
pr_warn("Invalid page fault event subtype: 0x%x\n",
504583
event_subtype);

0 commit comments

Comments
 (0)