Skip to content

Commit 403cd12

Browse files
Artemy-Mellanoxdledford
authored andcommitted
IB/umem: Add contiguous ODP support
Currenlty ODP supports only regular MMU pages. Add ODP support for regions consisting of physically contiguous chunks of arbitrary order (huge pages for instance) to improve performance. Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 4df4a5b commit 403cd12

File tree

2 files changed

+33
-21
lines changed

2 files changed

+33
-21
lines changed

drivers/infiniband/core/umem_odp.c

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,6 @@ void ib_umem_odp_release(struct ib_umem *umem)
504504
static int ib_umem_odp_map_dma_single_page(
505505
struct ib_umem *umem,
506506
int page_index,
507-
u64 base_virt_addr,
508507
struct page *page,
509508
u64 access_mask,
510509
unsigned long current_seq)
@@ -527,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
527526
if (!(umem->odp_data->dma_list[page_index])) {
528527
dma_addr = ib_dma_map_page(dev,
529528
page,
530-
0, PAGE_SIZE,
529+
0, BIT(umem->page_shift),
531530
DMA_BIDIRECTIONAL);
532531
if (ib_dma_mapping_error(dev, dma_addr)) {
533532
ret = -EFAULT;
@@ -555,8 +554,9 @@ static int ib_umem_odp_map_dma_single_page(
555554
if (remove_existing_mapping && umem->context->invalidate_range) {
556555
invalidate_page_trampoline(
557556
umem,
558-
base_virt_addr + (page_index * PAGE_SIZE),
559-
base_virt_addr + ((page_index+1)*PAGE_SIZE),
557+
ib_umem_start(umem) + (page_index >> umem->page_shift),
558+
ib_umem_start(umem) + ((page_index + 1) >>
559+
umem->page_shift),
560560
NULL);
561561
ret = -EAGAIN;
562562
}
@@ -595,10 +595,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
595595
struct task_struct *owning_process = NULL;
596596
struct mm_struct *owning_mm = NULL;
597597
struct page **local_page_list = NULL;
598-
u64 off;
599-
int j, k, ret = 0, start_idx, npages = 0;
600-
u64 base_virt_addr;
598+
u64 page_mask, off;
599+
int j, k, ret = 0, start_idx, npages = 0, page_shift;
601600
unsigned int flags = 0;
601+
phys_addr_t p = 0;
602602

603603
if (access_mask == 0)
604604
return -EINVAL;
@@ -611,9 +611,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
611611
if (!local_page_list)
612612
return -ENOMEM;
613613

614-
off = user_virt & (~PAGE_MASK);
615-
user_virt = user_virt & PAGE_MASK;
616-
base_virt_addr = user_virt;
614+
page_shift = umem->page_shift;
615+
page_mask = ~(BIT(page_shift) - 1);
616+
off = user_virt & (~page_mask);
617+
user_virt = user_virt & page_mask;
617618
bcnt += off; /* Charge for the first page offset as well. */
618619

619620
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -631,13 +632,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
631632
if (access_mask & ODP_WRITE_ALLOWED_BIT)
632633
flags |= FOLL_WRITE;
633634

634-
start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
635+
start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
635636
k = start_idx;
636637

637638
while (bcnt > 0) {
638-
const size_t gup_num_pages =
639-
min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
640-
PAGE_SIZE / sizeof(struct page *));
639+
const size_t gup_num_pages = min_t(size_t,
640+
(bcnt + BIT(page_shift) - 1) >> page_shift,
641+
PAGE_SIZE / sizeof(struct page *));
641642

642643
down_read(&owning_mm->mmap_sem);
643644
/*
@@ -656,14 +657,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
656657
break;
657658

658659
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
659-
user_virt += npages << PAGE_SHIFT;
660660
mutex_lock(&umem->odp_data->umem_mutex);
661-
for (j = 0; j < npages; ++j) {
661+
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
662+
if (user_virt & ~page_mask) {
663+
p += PAGE_SIZE;
664+
if (page_to_phys(local_page_list[j]) != p) {
665+
ret = -EFAULT;
666+
break;
667+
}
668+
put_page(local_page_list[j]);
669+
continue;
670+
}
671+
662672
ret = ib_umem_odp_map_dma_single_page(
663-
umem, k, base_virt_addr, local_page_list[j],
664-
access_mask, current_seq);
673+
umem, k, local_page_list[j],
674+
access_mask, current_seq);
665675
if (ret < 0)
666676
break;
677+
678+
p = page_to_phys(local_page_list[j]);
667679
k++;
668680
}
669681
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -708,7 +720,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
708720
* once. */
709721
mutex_lock(&umem->odp_data->umem_mutex);
710722
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
711-
idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
723+
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
712724
if (umem->odp_data->page_list[idx]) {
713725
struct page *page = umem->odp_data->page_list[idx];
714726
dma_addr_t dma = umem->odp_data->dma_list[idx];

include/rdma/ib_umem.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem)
7272
/* Returns the address of the page after the last one of an ODP umem. */
7373
static inline unsigned long ib_umem_end(struct ib_umem *umem)
7474
{
75-
return PAGE_ALIGN(umem->address + umem->length);
75+
return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
7676
}
7777

7878
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
7979
{
80-
return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
80+
return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
8181
}
8282

8383
#ifdef CONFIG_INFINIBAND_USER_MEM

0 commit comments

Comments
 (0)