Skip to content

Commit c9990ab

Browse files
jgunthorpedledford
authored andcommitted
RDMA/umem: Move all the ODP related stuff out of ucontext and into per_mm
This is the first step to make ODP use the owning_mm that is now part of struct ib_umem. Each ODP umem is linked to a single per_mm structure, which in turn, is linked to a single mm, via the embedded mmu_notifier. This first patch introduces the structure and reworks eveything to use it. This also needs to introduce tgid into the ib_ucontext_per_mm, as get_user_pages_remote() requires the originating task for statistics tracking. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 597ecc5 commit c9990ab

File tree

5 files changed

+120
-93
lines changed

5 files changed

+120
-93
lines changed

drivers/infiniband/core/umem_odp.c

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -115,34 +115,35 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
115115
}
116116

117117
/* Account for a new mmu notifier in an ib_ucontext. */
118-
static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
118+
static void
119+
ib_ucontext_notifier_start_account(struct ib_ucontext_per_mm *per_mm)
119120
{
120-
atomic_inc(&context->notifier_count);
121+
atomic_inc(&per_mm->notifier_count);
121122
}
122123

123124
/* Account for a terminating mmu notifier in an ib_ucontext.
124125
*
125126
* Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
126127
* the function takes the semaphore itself. */
127-
static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
128+
static void ib_ucontext_notifier_end_account(struct ib_ucontext_per_mm *per_mm)
128129
{
129-
int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
130+
int zero_notifiers = atomic_dec_and_test(&per_mm->notifier_count);
130131

131132
if (zero_notifiers &&
132-
!list_empty(&context->no_private_counters)) {
133+
!list_empty(&per_mm->no_private_counters)) {
133134
/* No currently running mmu notifiers. Now is the chance to
134135
* add private accounting to all previously added umems. */
135136
struct ib_umem_odp *odp_data, *next;
136137

137138
/* Prevent concurrent mmu notifiers from working on the
138139
* no_private_counters list. */
139-
down_write(&context->umem_rwsem);
140+
down_write(&per_mm->umem_rwsem);
140141

141142
/* Read the notifier_count again, with the umem_rwsem
142143
* semaphore taken for write. */
143-
if (!atomic_read(&context->notifier_count)) {
144+
if (!atomic_read(&per_mm->notifier_count)) {
144145
list_for_each_entry_safe(odp_data, next,
145-
&context->no_private_counters,
146+
&per_mm->no_private_counters,
146147
no_private_counters) {
147148
mutex_lock(&odp_data->umem_mutex);
148149
odp_data->mn_counters_active = true;
@@ -152,7 +153,7 @@ static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
152153
}
153154
}
154155

155-
up_write(&context->umem_rwsem);
156+
up_write(&per_mm->umem_rwsem);
156157
}
157158
}
158159

@@ -179,19 +180,20 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
179180
static void ib_umem_notifier_release(struct mmu_notifier *mn,
180181
struct mm_struct *mm)
181182
{
182-
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
183+
struct ib_ucontext_per_mm *per_mm =
184+
container_of(mn, struct ib_ucontext_per_mm, mn);
183185

184-
if (!context->invalidate_range)
186+
if (!per_mm->context->invalidate_range)
185187
return;
186188

187-
ib_ucontext_notifier_start_account(context);
188-
down_read(&context->umem_rwsem);
189-
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
189+
ib_ucontext_notifier_start_account(per_mm);
190+
down_read(&per_mm->umem_rwsem);
191+
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0,
190192
ULLONG_MAX,
191193
ib_umem_notifier_release_trampoline,
192194
true,
193195
NULL);
194-
up_read(&context->umem_rwsem);
196+
up_read(&per_mm->umem_rwsem);
195197
}
196198

197199
static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
@@ -217,23 +219,24 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
217219
unsigned long end,
218220
bool blockable)
219221
{
220-
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
222+
struct ib_ucontext_per_mm *per_mm =
223+
container_of(mn, struct ib_ucontext_per_mm, mn);
221224
int ret;
222225

223-
if (!context->invalidate_range)
226+
if (!per_mm->context->invalidate_range)
224227
return 0;
225228

226229
if (blockable)
227-
down_read(&context->umem_rwsem);
228-
else if (!down_read_trylock(&context->umem_rwsem))
230+
down_read(&per_mm->umem_rwsem);
231+
else if (!down_read_trylock(&per_mm->umem_rwsem))
229232
return -EAGAIN;
230233

231-
ib_ucontext_notifier_start_account(context);
232-
ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
234+
ib_ucontext_notifier_start_account(per_mm);
235+
ret = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
233236
end,
234237
invalidate_range_start_trampoline,
235238
blockable, NULL);
236-
up_read(&context->umem_rwsem);
239+
up_read(&per_mm->umem_rwsem);
237240

238241
return ret;
239242
}
@@ -250,22 +253,23 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
250253
unsigned long start,
251254
unsigned long end)
252255
{
253-
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
256+
struct ib_ucontext_per_mm *per_mm =
257+
container_of(mn, struct ib_ucontext_per_mm, mn);
254258

255-
if (!context->invalidate_range)
259+
if (!per_mm->context->invalidate_range)
256260
return;
257261

258262
/*
259263
* TODO: we currently bail out if there is any sleepable work to be done
260264
* in ib_umem_notifier_invalidate_range_start so we shouldn't really block
261265
* here. But this is ugly and fragile.
262266
*/
263-
down_read(&context->umem_rwsem);
264-
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
267+
down_read(&per_mm->umem_rwsem);
268+
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
265269
end,
266270
invalidate_range_end_trampoline, true, NULL);
267-
up_read(&context->umem_rwsem);
268-
ib_ucontext_notifier_end_account(context);
271+
up_read(&per_mm->umem_rwsem);
272+
ib_ucontext_notifier_end_account(per_mm);
269273
}
270274

271275
static const struct mmu_notifier_ops ib_umem_notifiers = {
@@ -277,6 +281,7 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
277281
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context,
278282
unsigned long addr, size_t size)
279283
{
284+
struct ib_ucontext_per_mm *per_mm;
280285
struct ib_umem_odp *odp_data;
281286
struct ib_umem *umem;
282287
int pages = size >> PAGE_SHIFT;
@@ -292,6 +297,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context,
292297
umem->page_shift = PAGE_SHIFT;
293298
umem->writable = 1;
294299
umem->is_odp = 1;
300+
odp_data->per_mm = per_mm = &context->per_mm;
295301

296302
mutex_init(&odp_data->umem_mutex);
297303
init_completion(&odp_data->notifier_completion);
@@ -310,15 +316,15 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context,
310316
goto out_page_list;
311317
}
312318

313-
down_write(&context->umem_rwsem);
314-
context->odp_mrs_count++;
315-
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
316-
if (likely(!atomic_read(&context->notifier_count)))
319+
down_write(&per_mm->umem_rwsem);
320+
per_mm->odp_mrs_count++;
321+
rbt_ib_umem_insert(&odp_data->interval_tree, &per_mm->umem_tree);
322+
if (likely(!atomic_read(&per_mm->notifier_count)))
317323
odp_data->mn_counters_active = true;
318324
else
319325
list_add(&odp_data->no_private_counters,
320-
&context->no_private_counters);
321-
up_write(&context->umem_rwsem);
326+
&per_mm->no_private_counters);
327+
up_write(&per_mm->umem_rwsem);
322328

323329
return odp_data;
324330

@@ -334,6 +340,7 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
334340
{
335341
struct ib_ucontext *context = umem_odp->umem.context;
336342
struct ib_umem *umem = &umem_odp->umem;
343+
struct ib_ucontext_per_mm *per_mm;
337344
int ret_val;
338345
struct pid *our_pid;
339346
struct mm_struct *mm = get_task_mm(current);
@@ -396,36 +403,38 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
396403
* notification before the "current" task (and MM) is
397404
* destroyed. We use the umem_rwsem semaphore to synchronize.
398405
*/
399-
down_write(&context->umem_rwsem);
400-
context->odp_mrs_count++;
406+
umem_odp->per_mm = per_mm = &context->per_mm;
407+
408+
down_write(&per_mm->umem_rwsem);
409+
per_mm->odp_mrs_count++;
401410
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
402411
rbt_ib_umem_insert(&umem_odp->interval_tree,
403-
&context->umem_tree);
404-
if (likely(!atomic_read(&context->notifier_count)) ||
405-
context->odp_mrs_count == 1)
412+
&per_mm->umem_tree);
413+
if (likely(!atomic_read(&per_mm->notifier_count)) ||
414+
per_mm->odp_mrs_count == 1)
406415
umem_odp->mn_counters_active = true;
407416
else
408417
list_add(&umem_odp->no_private_counters,
409-
&context->no_private_counters);
410-
downgrade_write(&context->umem_rwsem);
418+
&per_mm->no_private_counters);
419+
downgrade_write(&per_mm->umem_rwsem);
411420

412-
if (context->odp_mrs_count == 1) {
421+
if (per_mm->odp_mrs_count == 1) {
413422
/*
414423
* Note that at this point, no MMU notifier is running
415-
* for this context!
424+
* for this per_mm!
416425
*/
417-
atomic_set(&context->notifier_count, 0);
418-
INIT_HLIST_NODE(&context->mn.hlist);
419-
context->mn.ops = &ib_umem_notifiers;
420-
ret_val = mmu_notifier_register(&context->mn, mm);
426+
atomic_set(&per_mm->notifier_count, 0);
427+
INIT_HLIST_NODE(&per_mm->mn.hlist);
428+
per_mm->mn.ops = &ib_umem_notifiers;
429+
ret_val = mmu_notifier_register(&per_mm->mn, mm);
421430
if (ret_val) {
422431
pr_err("Failed to register mmu_notifier %d\n", ret_val);
423432
ret_val = -EBUSY;
424433
goto out_mutex;
425434
}
426435
}
427436

428-
up_read(&context->umem_rwsem);
437+
up_read(&per_mm->umem_rwsem);
429438

430439
/*
431440
* Note that doing an mmput can cause a notifier for the relevant mm.
@@ -437,7 +446,7 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
437446
return 0;
438447

439448
out_mutex:
440-
up_read(&context->umem_rwsem);
449+
up_read(&per_mm->umem_rwsem);
441450
vfree(umem_odp->dma_list);
442451
out_page_list:
443452
vfree(umem_odp->page_list);
@@ -449,7 +458,7 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
449458
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
450459
{
451460
struct ib_umem *umem = &umem_odp->umem;
452-
struct ib_ucontext *context = umem->context;
461+
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
453462

454463
/*
455464
* Ensure that no more pages are mapped in the umem.
@@ -460,11 +469,11 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
460469
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
461470
ib_umem_end(umem));
462471

463-
down_write(&context->umem_rwsem);
472+
down_write(&per_mm->umem_rwsem);
464473
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
465474
rbt_ib_umem_remove(&umem_odp->interval_tree,
466-
&context->umem_tree);
467-
context->odp_mrs_count--;
475+
&per_mm->umem_tree);
476+
per_mm->odp_mrs_count--;
468477
if (!umem_odp->mn_counters_active) {
469478
list_del(&umem_odp->no_private_counters);
470479
complete_all(&umem_odp->notifier_completion);
@@ -477,13 +486,13 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
477486
* that since we are doing it atomically, no other user could register
478487
* and unregister while we do the check.
479488
*/
480-
downgrade_write(&context->umem_rwsem);
481-
if (!context->odp_mrs_count) {
489+
downgrade_write(&per_mm->umem_rwsem);
490+
if (!per_mm->odp_mrs_count) {
482491
struct task_struct *owning_process = NULL;
483492
struct mm_struct *owning_mm = NULL;
484493

485-
owning_process = get_pid_task(context->tgid,
486-
PIDTYPE_PID);
494+
owning_process =
495+
get_pid_task(umem_odp->umem.context->tgid, PIDTYPE_PID);
487496
if (owning_process == NULL)
488497
/*
489498
* The process is already dead, notifier were removed
@@ -498,15 +507,15 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
498507
* removed already.
499508
*/
500509
goto out_put_task;
501-
mmu_notifier_unregister(&context->mn, owning_mm);
510+
mmu_notifier_unregister(&per_mm->mn, owning_mm);
502511

503512
mmput(owning_mm);
504513

505514
out_put_task:
506515
put_task_struct(owning_process);
507516
}
508517
out:
509-
up_read(&context->umem_rwsem);
518+
up_read(&per_mm->umem_rwsem);
510519

511520
vfree(umem_odp->dma_list);
512521
vfree(umem_odp->page_list);

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
124124
ucontext->cleanup_retryable = false;
125125

126126
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
127-
ucontext->umem_tree = RB_ROOT_CACHED;
128-
init_rwsem(&ucontext->umem_rwsem);
129-
ucontext->odp_mrs_count = 0;
130-
INIT_LIST_HEAD(&ucontext->no_private_counters);
127+
ucontext->per_mm.umem_tree = RB_ROOT_CACHED;
128+
init_rwsem(&ucontext->per_mm.umem_rwsem);
129+
ucontext->per_mm.odp_mrs_count = 0;
130+
INIT_LIST_HEAD(&ucontext->per_mm.no_private_counters);
131+
ucontext->per_mm.context = ucontext;
131132

132133
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
133134
ucontext->invalidate_range = NULL;

0 commit comments

Comments
 (0)