Skip to content

Commit c10c21e

Browse files
aikmpe
authored andcommitted
powerpc/vfio/iommu/kvm: Do not pin device memory
This new memory does not have page structs as it is not plugged to the host so gup() will fail anyway. This adds 2 helpers: - mm_iommu_newdev() to preregister the "memory device" memory so the rest of API can still be used; - mm_iommu_is_devmem() to know if the physical address is one of thise new regions which we must avoid unpinning of. This adds @mm to tce_page_is_contained() and iommu_tce_xchg() to test if the memory is device memory to avoid pfn_to_page(). This adds a check for device memory in mm_iommu_ua_mark_dirty_rm() which does delayed pages dirtying. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent e0bf78b commit c10c21e

File tree

6 files changed

+135
-32
lines changed

6 files changed

+135
-32
lines changed

arch/powerpc/include/asm/iommu.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,9 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
218218
extern int iommu_add_device(struct device *dev);
219219
extern void iommu_del_device(struct device *dev);
220220
extern int __init tce_iommu_bus_notifier_init(void);
221-
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
222-
unsigned long *hpa, enum dma_data_direction *direction);
221+
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
222+
unsigned long entry, unsigned long *hpa,
223+
enum dma_data_direction *direction);
223224
#else
224225
static inline void iommu_register_group(struct iommu_table_group *table_group,
225226
int pci_domain_number,

arch/powerpc/include/asm/mmu_context.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ extern bool mm_iommu_preregistered(struct mm_struct *mm);
2424
extern long mm_iommu_new(struct mm_struct *mm,
2525
unsigned long ua, unsigned long entries,
2626
struct mm_iommu_table_group_mem_t **pmem);
27+
extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
28+
unsigned long entries, unsigned long dev_hpa,
29+
struct mm_iommu_table_group_mem_t **pmem);
2730
extern long mm_iommu_put(struct mm_struct *mm,
2831
struct mm_iommu_table_group_mem_t *mem);
2932
extern void mm_iommu_init(struct mm_struct *mm);
@@ -39,8 +42,16 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
3942
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
4043
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
4144
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
45+
extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
46+
unsigned int pageshift, unsigned long *size);
4247
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
4348
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
49+
#else
50+
static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
51+
unsigned int pageshift, unsigned long *size)
52+
{
53+
return false;
54+
}
4455
#endif
4556
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
4657
extern void set_context(unsigned long id, pgd_t *pgd);

arch/powerpc/kernel/iommu.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <asm/fadump.h>
4848
#include <asm/vio.h>
4949
#include <asm/tce.h>
50+
#include <asm/mmu_context.h>
5051

5152
#define DBG(...)
5253

@@ -993,15 +994,19 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
993994
}
994995
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
995996

996-
long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
997-
unsigned long *hpa, enum dma_data_direction *direction)
997+
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
998+
unsigned long entry, unsigned long *hpa,
999+
enum dma_data_direction *direction)
9981000
{
9991001
long ret;
1002+
unsigned long size = 0;
10001003

10011004
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
10021005

10031006
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
1004-
(*direction == DMA_BIDIRECTIONAL)))
1007+
(*direction == DMA_BIDIRECTIONAL)) &&
1008+
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
1009+
&size))
10051010
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
10061011

10071012
/* if (unlikely(ret))

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -397,12 +397,13 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
397397
return H_SUCCESS;
398398
}
399399

400-
static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
400+
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
401+
unsigned long entry)
401402
{
402403
unsigned long hpa = 0;
403404
enum dma_data_direction dir = DMA_NONE;
404405

405-
iommu_tce_xchg(tbl, entry, &hpa, &dir);
406+
iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
406407
}
407408

408409
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -433,15 +434,15 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
433434
unsigned long hpa = 0;
434435
long ret;
435436

436-
if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
437+
if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
437438
return H_TOO_HARD;
438439

439440
if (dir == DMA_NONE)
440441
return H_SUCCESS;
441442

442443
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
443444
if (ret != H_SUCCESS)
444-
iommu_tce_xchg(tbl, entry, &hpa, &dir);
445+
iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
445446

446447
return ret;
447448
}
@@ -487,7 +488,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
487488
if (mm_iommu_mapped_inc(mem))
488489
return H_TOO_HARD;
489490

490-
ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
491+
ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
491492
if (WARN_ON_ONCE(ret)) {
492493
mm_iommu_mapped_dec(mem);
493494
return H_TOO_HARD;
@@ -566,7 +567,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
566567
entry, ua, dir);
567568

568569
if (ret != H_SUCCESS) {
569-
kvmppc_clear_tce(stit->tbl, entry);
570+
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
570571
goto unlock_exit;
571572
}
572573
}
@@ -655,7 +656,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
655656
iommu_tce_direction(tce));
656657

657658
if (ret != H_SUCCESS) {
658-
kvmppc_clear_tce(stit->tbl, entry);
659+
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
660+
entry);
659661
goto unlock_exit;
660662
}
661663
}
@@ -704,7 +706,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
704706
return ret;
705707

706708
WARN_ON_ONCE(1);
707-
kvmppc_clear_tce(stit->tbl, entry);
709+
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
708710
}
709711
}
710712

arch/powerpc/mm/mmu_context_iommu.c

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
3636
u64 ua; /* userspace address */
3737
u64 entries; /* number of entries in hpas[] */
3838
u64 *hpas; /* vmalloc'ed */
39+
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
40+
u64 dev_hpa; /* Device memory base address */
3941
};
4042

4143
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
126128
return 0;
127129
}
128130

129-
long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
131+
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
132+
unsigned long entries, unsigned long dev_hpa,
130133
struct mm_iommu_table_group_mem_t **pmem)
131134
{
132135
struct mm_iommu_table_group_mem_t *mem;
@@ -150,18 +153,27 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
150153

151154
}
152155

153-
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
154-
if (ret)
155-
goto unlock_exit;
156+
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
157+
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
158+
if (ret)
159+
goto unlock_exit;
156160

157-
locked_entries = entries;
161+
locked_entries = entries;
162+
}
158163

159164
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
160165
if (!mem) {
161166
ret = -ENOMEM;
162167
goto unlock_exit;
163168
}
164169

170+
if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
171+
mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
172+
mem->dev_hpa = dev_hpa;
173+
goto good_exit;
174+
}
175+
mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
176+
165177
/*
166178
* For a starting point for a maximum page size calculation
167179
* we use @ua and @entries natural alignment to allow IOMMU pages
@@ -230,6 +242,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
230242
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
231243
}
232244

245+
good_exit:
233246
atomic64_set(&mem->mapped, 1);
234247
mem->used = 1;
235248
mem->ua = ua;
@@ -246,13 +259,31 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
246259

247260
return ret;
248261
}
262+
263+
long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
264+
struct mm_iommu_table_group_mem_t **pmem)
265+
{
266+
return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
267+
pmem);
268+
}
249269
EXPORT_SYMBOL_GPL(mm_iommu_new);
250270

271+
long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
272+
unsigned long entries, unsigned long dev_hpa,
273+
struct mm_iommu_table_group_mem_t **pmem)
274+
{
275+
return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
276+
}
277+
EXPORT_SYMBOL_GPL(mm_iommu_newdev);
278+
251279
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
252280
{
253281
long i;
254282
struct page *page = NULL;
255283

284+
if (!mem->hpas)
285+
return;
286+
256287
for (i = 0; i < mem->entries; ++i) {
257288
if (!mem->hpas[i])
258289
continue;
@@ -294,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
294325
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
295326
{
296327
long ret = 0;
328+
unsigned long entries, dev_hpa;
297329

298330
mutex_lock(&mem_list_mutex);
299331

@@ -315,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
315347
}
316348

317349
/* @mapped became 0 so now mappings are disabled, release the region */
350+
entries = mem->entries;
351+
dev_hpa = mem->dev_hpa;
318352
mm_iommu_release(mem);
319353

320-
mm_iommu_adjust_locked_vm(mm, mem->entries, false);
354+
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
355+
mm_iommu_adjust_locked_vm(mm, entries, false);
321356

322357
unlock_exit:
323358
mutex_unlock(&mem_list_mutex);
@@ -387,14 +422,20 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
387422
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
388423
{
389424
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
390-
u64 *va = &mem->hpas[entry];
425+
u64 *va;
391426

392427
if (entry >= mem->entries)
393428
return -EFAULT;
394429

395430
if (pageshift > mem->pageshift)
396431
return -EFAULT;
397432

433+
if (!mem->hpas) {
434+
*hpa = mem->dev_hpa + (ua - mem->ua);
435+
return 0;
436+
}
437+
438+
va = &mem->hpas[entry];
398439
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
399440

400441
return 0;
@@ -405,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
405446
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
406447
{
407448
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
408-
void *va = &mem->hpas[entry];
409449
unsigned long *pa;
410450

411451
if (entry >= mem->entries)
@@ -414,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
414454
if (pageshift > mem->pageshift)
415455
return -EFAULT;
416456

417-
pa = (void *) vmalloc_to_phys(va);
457+
if (!mem->hpas) {
458+
*hpa = mem->dev_hpa + (ua - mem->ua);
459+
return 0;
460+
}
461+
462+
pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
418463
if (!pa)
419464
return -EFAULT;
420465

@@ -434,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
434479
if (!mem)
435480
return;
436481

482+
if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
483+
return;
484+
437485
entry = (ua - mem->ua) >> PAGE_SHIFT;
438486
va = &mem->hpas[entry];
439487

@@ -444,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
444492
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
445493
}
446494

495+
bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
496+
unsigned int pageshift, unsigned long *size)
497+
{
498+
struct mm_iommu_table_group_mem_t *mem;
499+
unsigned long end;
500+
501+
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
502+
if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
503+
continue;
504+
505+
end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
506+
if ((mem->dev_hpa <= hpa) && (hpa < end)) {
507+
/*
508+
* Since the IOMMU page size might be bigger than
509+
* PAGE_SIZE, the amount of preregistered memory
510+
* starting from @hpa might be smaller than 1<<pageshift
511+
* and the caller needs to distinguish this situation.
512+
*/
513+
*size = min(1UL << pageshift, end - hpa);
514+
return true;
515+
}
516+
}
517+
518+
return false;
519+
}
520+
EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
521+
447522
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
448523
{
449524
if (atomic64_inc_not_zero(&mem->mapped))

0 commit comments

Comments
 (0)