Skip to content

Commit bbd5ff5

Browse files
apopplempe
authored andcommitted
powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
NPU2 requires an extra explicit flush to an active GPU PID when sending address translation shoot downs (ATSDs) to reliably flush the GPU TLB. This patch adds just such a flush at the end of each sequence of ATSDs. We can safely use PID 0 which is always reserved and active on the GPU. PID 0 is only used for init_mm which will never be a user mm on the GPU. To enforce this we add a check in pnv_npu2_init_context() just in case someone tries to use PID 0 on the GPU. Signed-off-by: Alistair Popple <alistair@popple.id.au> [mpe: Use true/false for bool literals] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent bf05fc2 commit bbd5ff5

File tree

1 file changed

+65
-29
lines changed

1 file changed

+65
-29
lines changed

arch/powerpc/platforms/powernv/npu-dma.c

Lines changed: 65 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
449449
return mmio_atsd_reg;
450450
}
451451

452-
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
452+
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
453453
{
454454
unsigned long launch;
455455

@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
465465
/* PID */
466466
launch |= pid << PPC_BITLSHIFT(38);
467467

468+
/* No flush */
469+
launch |= !flush << PPC_BITLSHIFT(39);
470+
468471
/* Invalidating the entire process doesn't use a va */
469472
return mmio_launch_invalidate(npu, launch, 0);
470473
}
471474

472475
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
473-
unsigned long pid)
476+
unsigned long pid, bool flush)
474477
{
475478
unsigned long launch;
476479

@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
486489
/* PID */
487490
launch |= pid << PPC_BITLSHIFT(38);
488491

492+
/* No flush */
493+
launch |= !flush << PPC_BITLSHIFT(39);
494+
489495
return mmio_launch_invalidate(npu, launch, va);
490496
}
491497

492498
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
493499

500+
struct mmio_atsd_reg {
501+
struct npu *npu;
502+
int reg;
503+
};
504+
505+
static void mmio_invalidate_wait(
506+
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
507+
{
508+
struct npu *npu;
509+
int i, reg;
510+
511+
/* Wait for all invalidations to complete */
512+
for (i = 0; i <= max_npu2_index; i++) {
513+
if (mmio_atsd_reg[i].reg < 0)
514+
continue;
515+
516+
/* Wait for completion */
517+
npu = mmio_atsd_reg[i].npu;
518+
reg = mmio_atsd_reg[i].reg;
519+
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
520+
cpu_relax();
521+
522+
put_mmio_atsd_reg(npu, reg);
523+
524+
/*
525+
* The GPU requires two flush ATSDs to ensure all entries have
526+
* been flushed. We use PID 0 as it will never be used for a
527+
* process on the GPU.
528+
*/
529+
if (flush)
530+
mmio_invalidate_pid(npu, 0, true);
531+
}
532+
}
533+
494534
/*
495535
* Invalidate either a single address or an entire PID depending on
496536
* the value of va.
497537
*/
498538
static void mmio_invalidate(struct npu_context *npu_context, int va,
499-
unsigned long address)
539+
unsigned long address, bool flush)
500540
{
501-
int i, j, reg;
541+
int i, j;
502542
struct npu *npu;
503543
struct pnv_phb *nphb;
504544
struct pci_dev *npdev;
505-
struct {
506-
struct npu *npu;
507-
int reg;
508-
} mmio_atsd_reg[NV_MAX_NPUS];
545+
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
509546
unsigned long pid = npu_context->mm->context.id;
510547

511548
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
525562

526563
if (va)
527564
mmio_atsd_reg[i].reg =
528-
mmio_invalidate_va(npu, address, pid);
565+
mmio_invalidate_va(npu, address, pid,
566+
flush);
529567
else
530568
mmio_atsd_reg[i].reg =
531-
mmio_invalidate_pid(npu, pid);
569+
mmio_invalidate_pid(npu, pid, flush);
532570

533571
/*
534572
* The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
544582
*/
545583
flush_tlb_mm(npu_context->mm);
546584

547-
/* Wait for all invalidations to complete */
548-
for (i = 0; i <= max_npu2_index; i++) {
549-
if (mmio_atsd_reg[i].reg < 0)
550-
continue;
551-
552-
/* Wait for completion */
553-
npu = mmio_atsd_reg[i].npu;
554-
reg = mmio_atsd_reg[i].reg;
555-
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
556-
cpu_relax();
557-
put_mmio_atsd_reg(npu, reg);
558-
}
585+
mmio_invalidate_wait(mmio_atsd_reg, flush);
586+
if (flush)
587+
/* Wait for the flush to complete */
588+
mmio_invalidate_wait(mmio_atsd_reg, false);
559589
}
560590

561591
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
571601
* There should be no more translation requests for this PID, but we
572602
* need to ensure any entries for it are removed from the TLB.
573603
*/
574-
mmio_invalidate(npu_context, 0, 0);
604+
mmio_invalidate(npu_context, 0, 0, true);
575605
}
576606

577607
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
581611
{
582612
struct npu_context *npu_context = mn_to_npu_context(mn);
583613

584-
mmio_invalidate(npu_context, 1, address);
614+
mmio_invalidate(npu_context, 1, address, true);
585615
}
586616

587617
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
590620
{
591621
struct npu_context *npu_context = mn_to_npu_context(mn);
592622

593-
mmio_invalidate(npu_context, 1, address);
623+
mmio_invalidate(npu_context, 1, address, true);
594624
}
595625

596626
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
600630
struct npu_context *npu_context = mn_to_npu_context(mn);
601631
unsigned long address;
602632

603-
for (address = start; address <= end; address += PAGE_SIZE)
604-
mmio_invalidate(npu_context, 1, address);
633+
for (address = start; address < end; address += PAGE_SIZE)
634+
mmio_invalidate(npu_context, 1, address, false);
635+
636+
/* Do the flush only on the final addess == end */
637+
mmio_invalidate(npu_context, 1, address, true);
605638
}
606639

607640
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
651684
/* No nvlink associated with this GPU device */
652685
return ERR_PTR(-ENODEV);
653686

654-
if (!mm) {
655-
/* kernel thread contexts are not supported */
687+
if (!mm || mm->context.id == 0) {
688+
/*
689+
* Kernel thread contexts are not supported and context id 0 is
690+
* reserved on the GPU.
691+
*/
656692
return ERR_PTR(-EINVAL);
657693
}
658694

0 commit comments

Comments
 (0)