@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
449
449
return mmio_atsd_reg ;
450
450
}
451
451
452
- static int mmio_invalidate_pid (struct npu * npu , unsigned long pid )
452
+ static int mmio_invalidate_pid (struct npu * npu , unsigned long pid , bool flush )
453
453
{
454
454
unsigned long launch ;
455
455
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
465
465
/* PID */
466
466
launch |= pid << PPC_BITLSHIFT (38 );
467
467
468
+ /* No flush */
469
+ launch |= !flush << PPC_BITLSHIFT (39 );
470
+
468
471
/* Invalidating the entire process doesn't use a va */
469
472
return mmio_launch_invalidate (npu , launch , 0 );
470
473
}
471
474
472
475
static int mmio_invalidate_va (struct npu * npu , unsigned long va ,
473
- unsigned long pid )
476
+ unsigned long pid , bool flush )
474
477
{
475
478
unsigned long launch ;
476
479
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
486
489
/* PID */
487
490
launch |= pid << PPC_BITLSHIFT (38 );
488
491
492
+ /* No flush */
493
+ launch |= !flush << PPC_BITLSHIFT (39 );
494
+
489
495
return mmio_launch_invalidate (npu , launch , va );
490
496
}
491
497
492
498
#define mn_to_npu_context (x ) container_of(x, struct npu_context, mn)
493
499
500
+ struct mmio_atsd_reg {
501
+ struct npu * npu ;
502
+ int reg ;
503
+ };
504
+
505
+ static void mmio_invalidate_wait (
506
+ struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ], bool flush )
507
+ {
508
+ struct npu * npu ;
509
+ int i , reg ;
510
+
511
+ /* Wait for all invalidations to complete */
512
+ for (i = 0 ; i <= max_npu2_index ; i ++ ) {
513
+ if (mmio_atsd_reg [i ].reg < 0 )
514
+ continue ;
515
+
516
+ /* Wait for completion */
517
+ npu = mmio_atsd_reg [i ].npu ;
518
+ reg = mmio_atsd_reg [i ].reg ;
519
+ while (__raw_readq (npu -> mmio_atsd_regs [reg ] + XTS_ATSD_STAT ))
520
+ cpu_relax ();
521
+
522
+ put_mmio_atsd_reg (npu , reg );
523
+
524
+ /*
525
+ * The GPU requires two flush ATSDs to ensure all entries have
526
+ * been flushed. We use PID 0 as it will never be used for a
527
+ * process on the GPU.
528
+ */
529
+ if (flush )
530
+ mmio_invalidate_pid (npu , 0 , true);
531
+ }
532
+ }
533
+
494
534
/*
495
535
* Invalidate either a single address or an entire PID depending on
496
536
* the value of va.
497
537
*/
498
538
static void mmio_invalidate (struct npu_context * npu_context , int va ,
499
- unsigned long address )
539
+ unsigned long address , bool flush )
500
540
{
501
- int i , j , reg ;
541
+ int i , j ;
502
542
struct npu * npu ;
503
543
struct pnv_phb * nphb ;
504
544
struct pci_dev * npdev ;
505
- struct {
506
- struct npu * npu ;
507
- int reg ;
508
- } mmio_atsd_reg [NV_MAX_NPUS ];
545
+ struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ];
509
546
unsigned long pid = npu_context -> mm -> context .id ;
510
547
511
548
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
525
562
526
563
if (va )
527
564
mmio_atsd_reg [i ].reg =
528
- mmio_invalidate_va (npu , address , pid );
565
+ mmio_invalidate_va (npu , address , pid ,
566
+ flush );
529
567
else
530
568
mmio_atsd_reg [i ].reg =
531
- mmio_invalidate_pid (npu , pid );
569
+ mmio_invalidate_pid (npu , pid , flush );
532
570
533
571
/*
534
572
* The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
544
582
*/
545
583
flush_tlb_mm (npu_context -> mm );
546
584
547
- /* Wait for all invalidations to complete */
548
- for (i = 0 ; i <= max_npu2_index ; i ++ ) {
549
- if (mmio_atsd_reg [i ].reg < 0 )
550
- continue ;
551
-
552
- /* Wait for completion */
553
- npu = mmio_atsd_reg [i ].npu ;
554
- reg = mmio_atsd_reg [i ].reg ;
555
- while (__raw_readq (npu -> mmio_atsd_regs [reg ] + XTS_ATSD_STAT ))
556
- cpu_relax ();
557
- put_mmio_atsd_reg (npu , reg );
558
- }
585
+ mmio_invalidate_wait (mmio_atsd_reg , flush );
586
+ if (flush )
587
+ /* Wait for the flush to complete */
588
+ mmio_invalidate_wait (mmio_atsd_reg , false);
559
589
}
560
590
561
591
static void pnv_npu2_mn_release (struct mmu_notifier * mn ,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
571
601
* There should be no more translation requests for this PID, but we
572
602
* need to ensure any entries for it are removed from the TLB.
573
603
*/
574
- mmio_invalidate (npu_context , 0 , 0 );
604
+ mmio_invalidate (npu_context , 0 , 0 , true );
575
605
}
576
606
577
607
static void pnv_npu2_mn_change_pte (struct mmu_notifier * mn ,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
581
611
{
582
612
struct npu_context * npu_context = mn_to_npu_context (mn );
583
613
584
- mmio_invalidate (npu_context , 1 , address );
614
+ mmio_invalidate (npu_context , 1 , address , true );
585
615
}
586
616
587
617
static void pnv_npu2_mn_invalidate_page (struct mmu_notifier * mn ,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
590
620
{
591
621
struct npu_context * npu_context = mn_to_npu_context (mn );
592
622
593
- mmio_invalidate (npu_context , 1 , address );
623
+ mmio_invalidate (npu_context , 1 , address , true );
594
624
}
595
625
596
626
static void pnv_npu2_mn_invalidate_range (struct mmu_notifier * mn ,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
600
630
struct npu_context * npu_context = mn_to_npu_context (mn );
601
631
unsigned long address ;
602
632
603
- for (address = start ; address <= end ; address += PAGE_SIZE )
604
- mmio_invalidate (npu_context , 1 , address );
633
+ for (address = start ; address < end ; address += PAGE_SIZE )
634
+ mmio_invalidate (npu_context , 1 , address , false);
635
+
636
+ /* Do the flush only on the final addess == end */
637
+ mmio_invalidate (npu_context , 1 , address , true);
605
638
}
606
639
607
640
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
651
684
/* No nvlink associated with this GPU device */
652
685
return ERR_PTR (- ENODEV );
653
686
654
- if (!mm ) {
655
- /* kernel thread contexts are not supported */
687
+ if (!mm || mm -> context .id == 0 ) {
688
+ /*
689
+ * Kernel thread contexts are not supported and context id 0 is
690
+ * reserved on the GPU.
691
+ */
656
692
return ERR_PTR (- EINVAL );
657
693
}
658
694
0 commit comments