@@ -288,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
288
288
return ;
289
289
}
290
290
291
- static struct mlx5_ib_mr * mlx5_ib_odp_find_mr_lkey (struct mlx5_ib_dev * dev ,
292
- u32 key )
293
- {
294
- u32 base_key = mlx5_base_mkey (key );
295
- struct mlx5_core_mkey * mmkey = __mlx5_mr_lookup (dev -> mdev , base_key );
296
- struct mlx5_ib_mr * mr ;
297
-
298
- if (!mmkey || mmkey -> key != key || mmkey -> type != MLX5_MKEY_MR )
299
- return NULL ;
300
-
301
- mr = container_of (mmkey , struct mlx5_ib_mr , mmkey );
302
-
303
- if (!mr -> live )
304
- return NULL ;
305
-
306
- return container_of (mmkey , struct mlx5_ib_mr , mmkey );
307
- }
308
-
309
291
static void mlx5_ib_page_fault_resume (struct mlx5_ib_dev * dev ,
310
292
struct mlx5_pagefault * pfault ,
311
293
int error )
@@ -625,6 +607,14 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
625
607
return ret ;
626
608
}
627
609
610
+ struct pf_frame {
611
+ struct pf_frame * next ;
612
+ u32 key ;
613
+ u64 io_virt ;
614
+ size_t bcnt ;
615
+ int depth ;
616
+ };
617
+
628
618
/*
629
619
* Handle a single data segment in a page-fault WQE or RDMA region.
630
620
*
@@ -641,43 +631,128 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
641
631
u32 * bytes_committed ,
642
632
u32 * bytes_mapped )
643
633
{
644
- int npages = 0 , srcu_key , ret ;
634
+ int npages = 0 , srcu_key , ret , i , outlen , cur_outlen = 0 , depth = 0 ;
635
+ struct pf_frame * head = NULL , * frame ;
636
+ struct mlx5_core_mkey * mmkey ;
637
+ struct mlx5_ib_mw * mw ;
645
638
struct mlx5_ib_mr * mr ;
646
- size_t size ;
639
+ struct mlx5_klm * pklm ;
640
+ u32 * out = NULL ;
641
+ size_t offset ;
647
642
648
643
srcu_key = srcu_read_lock (& dev -> mr_srcu );
649
- mr = mlx5_ib_odp_find_mr_lkey (dev , key );
650
- /*
651
- * If we didn't find the MR, it means the MR was closed while we were
652
- * handling the ODP event. In this case we return -EFAULT so that the
653
- * QP will be closed.
654
- */
655
- if (!mr || !mr -> ibmr .pd ) {
656
- mlx5_ib_dbg (dev , "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n" ,
657
- key );
644
+
645
+ io_virt += * bytes_committed ;
646
+ bcnt -= * bytes_committed ;
647
+
648
+ next_mr :
649
+ mmkey = __mlx5_mr_lookup (dev -> mdev , mlx5_base_mkey (key ));
650
+ if (!mmkey || mmkey -> key != key ) {
651
+ mlx5_ib_dbg (dev , "failed to find mkey %x\n" , key );
658
652
ret = - EFAULT ;
659
653
goto srcu_unlock ;
660
654
}
661
- if (!mr -> umem -> odp_data ) {
662
- mlx5_ib_dbg (dev , "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n" ,
663
- key );
664
- if (bytes_mapped )
665
- * bytes_mapped +=
666
- (bcnt - * bytes_committed );
655
+
656
+ switch (mmkey -> type ) {
657
+ case MLX5_MKEY_MR :
658
+ mr = container_of (mmkey , struct mlx5_ib_mr , mmkey );
659
+ if (!mr -> live || !mr -> ibmr .pd ) {
660
+ mlx5_ib_dbg (dev , "got dead MR\n" );
661
+ ret = - EFAULT ;
662
+ goto srcu_unlock ;
663
+ }
664
+
665
+ ret = pagefault_mr (dev , mr , io_virt , bcnt , bytes_mapped );
666
+ if (ret < 0 )
667
+ goto srcu_unlock ;
668
+
669
+ npages += ret ;
670
+ ret = 0 ;
671
+ break ;
672
+
673
+ case MLX5_MKEY_MW :
674
+ mw = container_of (mmkey , struct mlx5_ib_mw , mmkey );
675
+
676
+ if (depth >= MLX5_CAP_GEN (dev -> mdev , max_indirection )) {
677
+ mlx5_ib_dbg (dev , "indirection level exceeded\n" );
678
+ ret = - EFAULT ;
679
+ goto srcu_unlock ;
680
+ }
681
+
682
+ outlen = MLX5_ST_SZ_BYTES (query_mkey_out ) +
683
+ sizeof (* pklm ) * (mw -> ndescs - 2 );
684
+
685
+ if (outlen > cur_outlen ) {
686
+ kfree (out );
687
+ out = kzalloc (outlen , GFP_KERNEL );
688
+ if (!out ) {
689
+ ret = - ENOMEM ;
690
+ goto srcu_unlock ;
691
+ }
692
+ cur_outlen = outlen ;
693
+ }
694
+
695
+ pklm = (struct mlx5_klm * )MLX5_ADDR_OF (query_mkey_out , out ,
696
+ bsf0_klm0_pas_mtt0_1 );
697
+
698
+ ret = mlx5_core_query_mkey (dev -> mdev , & mw -> mmkey , out , outlen );
699
+ if (ret )
700
+ goto srcu_unlock ;
701
+
702
+ offset = io_virt - MLX5_GET64 (query_mkey_out , out ,
703
+ memory_key_mkey_entry .start_addr );
704
+
705
+ for (i = 0 ; bcnt && i < mw -> ndescs ; i ++ , pklm ++ ) {
706
+ if (offset >= be32_to_cpu (pklm -> bcount )) {
707
+ offset -= be32_to_cpu (pklm -> bcount );
708
+ continue ;
709
+ }
710
+
711
+ frame = kzalloc (sizeof (* frame ), GFP_KERNEL );
712
+ if (!frame ) {
713
+ ret = - ENOMEM ;
714
+ goto srcu_unlock ;
715
+ }
716
+
717
+ frame -> key = be32_to_cpu (pklm -> key );
718
+ frame -> io_virt = be64_to_cpu (pklm -> va ) + offset ;
719
+ frame -> bcnt = min_t (size_t , bcnt ,
720
+ be32_to_cpu (pklm -> bcount ) - offset );
721
+ frame -> depth = depth + 1 ;
722
+ frame -> next = head ;
723
+ head = frame ;
724
+
725
+ bcnt -= frame -> bcnt ;
726
+ }
727
+ break ;
728
+
729
+ default :
730
+ mlx5_ib_dbg (dev , "wrong mkey type %d\n" , mmkey -> type );
731
+ ret = - EFAULT ;
667
732
goto srcu_unlock ;
668
733
}
669
734
670
- /*
671
- * Avoid branches - this code will perform correctly
672
- * in all iterations (in iteration 2 and above,
673
- * bytes_committed == 0).
674
- */
675
- io_virt += * bytes_committed ;
676
- bcnt -= * bytes_committed ;
735
+ if (head ) {
736
+ frame = head ;
737
+ head = frame -> next ;
677
738
678
- npages = pagefault_mr (dev , mr , io_virt , size , bytes_mapped );
739
+ key = frame -> key ;
740
+ io_virt = frame -> io_virt ;
741
+ bcnt = frame -> bcnt ;
742
+ depth = frame -> depth ;
743
+ kfree (frame );
744
+
745
+ goto next_mr ;
746
+ }
679
747
680
748
srcu_unlock :
749
+ while (head ) {
750
+ frame = head ;
751
+ head = frame -> next ;
752
+ kfree (frame );
753
+ }
754
+ kfree (out );
755
+
681
756
srcu_read_unlock (& dev -> mr_srcu , srcu_key );
682
757
* bytes_committed = 0 ;
683
758
return ret ? ret : npages ;
0 commit comments