35
35
36
36
#include "mlx5_ib.h"
37
37
38
+ #define MAX_PREFETCH_LEN (4*1024*1024U)
39
+
38
40
struct workqueue_struct * mlx5_ib_page_fault_wq ;
39
41
40
42
#define COPY_ODP_BIT_MLX_TO_IB (reg , ib_caps , field_name , bit_name ) do { \
@@ -490,6 +492,80 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
490
492
free_page ((unsigned long )buffer );
491
493
}
492
494
495
+ static int pages_in_range (u64 address , u32 length )
496
+ {
497
+ return (ALIGN (address + length , PAGE_SIZE ) -
498
+ (address & PAGE_MASK )) >> PAGE_SHIFT ;
499
+ }
500
+
501
+ static void mlx5_ib_mr_rdma_pfault_handler (struct mlx5_ib_qp * qp ,
502
+ struct mlx5_ib_pfault * pfault )
503
+ {
504
+ struct mlx5_pagefault * mpfault = & pfault -> mpfault ;
505
+ u64 address ;
506
+ u32 length ;
507
+ u32 prefetch_len = mpfault -> bytes_committed ;
508
+ int prefetch_activated = 0 ;
509
+ u32 rkey = mpfault -> rdma .r_key ;
510
+ int ret ;
511
+
512
+ /* The RDMA responder handler handles the page fault in two parts.
513
+ * First it brings the necessary pages for the current packet
514
+ * (and uses the pfault context), and then (after resuming the QP)
515
+ * prefetches more pages. The second operation cannot use the pfault
516
+ * context and therefore uses the dummy_pfault context allocated on
517
+ * the stack */
518
+ struct mlx5_ib_pfault dummy_pfault = {};
519
+
520
+ dummy_pfault .mpfault .bytes_committed = 0 ;
521
+
522
+ mpfault -> rdma .rdma_va += mpfault -> bytes_committed ;
523
+ mpfault -> rdma .rdma_op_len -= min (mpfault -> bytes_committed ,
524
+ mpfault -> rdma .rdma_op_len );
525
+ mpfault -> bytes_committed = 0 ;
526
+
527
+ address = mpfault -> rdma .rdma_va ;
528
+ length = mpfault -> rdma .rdma_op_len ;
529
+
530
+ /* For some operations, the hardware cannot tell the exact message
531
+ * length, and in those cases it reports zero. Use prefetch
532
+ * logic. */
533
+ if (length == 0 ) {
534
+ prefetch_activated = 1 ;
535
+ length = mpfault -> rdma .packet_size ;
536
+ prefetch_len = min (MAX_PREFETCH_LEN , prefetch_len );
537
+ }
538
+
539
+ ret = pagefault_single_data_segment (qp , pfault , rkey , address , length ,
540
+ NULL );
541
+ if (ret == - EAGAIN ) {
542
+ /* We're racing with an invalidation, don't prefetch */
543
+ prefetch_activated = 0 ;
544
+ } else if (ret < 0 || pages_in_range (address , length ) > ret ) {
545
+ mlx5_ib_page_fault_resume (qp , pfault , 1 );
546
+ return ;
547
+ }
548
+
549
+ mlx5_ib_page_fault_resume (qp , pfault , 0 );
550
+
551
+ /* At this point, there might be a new pagefault already arriving in
552
+ * the eq, switch to the dummy pagefault for the rest of the
553
+ * processing. We're still OK with the objects being alive as the
554
+ * work-queue is being fenced. */
555
+
556
+ if (prefetch_activated ) {
557
+ ret = pagefault_single_data_segment (qp , & dummy_pfault , rkey ,
558
+ address ,
559
+ prefetch_len ,
560
+ NULL );
561
+ if (ret < 0 ) {
562
+ pr_warn ("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n" ,
563
+ ret , prefetch_activated ,
564
+ qp -> ibqp .qp_num , address , prefetch_len );
565
+ }
566
+ }
567
+ }
568
+
493
569
void mlx5_ib_mr_pfault_handler (struct mlx5_ib_qp * qp ,
494
570
struct mlx5_ib_pfault * pfault )
495
571
{
@@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
499
575
case MLX5_PFAULT_SUBTYPE_WQE :
500
576
mlx5_ib_mr_wqe_pfault_handler (qp , pfault );
501
577
break ;
578
+ case MLX5_PFAULT_SUBTYPE_RDMA :
579
+ mlx5_ib_mr_rdma_pfault_handler (qp , pfault );
580
+ break ;
502
581
default :
503
582
pr_warn ("Invalid page fault event subtype: 0x%x\n" ,
504
583
event_subtype );
0 commit comments