@@ -410,6 +410,11 @@ struct npu_context {
410
410
void * priv ;
411
411
};
412
412
413
+ struct mmio_atsd_reg {
414
+ struct npu * npu ;
415
+ int reg ;
416
+ };
417
+
413
418
/*
414
419
* Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
415
420
* if none are available.
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
419
424
int i ;
420
425
421
426
for (i = 0 ; i < npu -> mmio_atsd_count ; i ++ ) {
422
- if (!test_and_set_bit (i , & npu -> mmio_atsd_usage ))
427
+ if (!test_and_set_bit_lock (i , & npu -> mmio_atsd_usage ))
423
428
return i ;
424
429
}
425
430
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
428
433
429
434
static void put_mmio_atsd_reg (struct npu * npu , int reg )
430
435
{
431
- clear_bit (reg , & npu -> mmio_atsd_usage );
436
+ clear_bit_unlock (reg , & npu -> mmio_atsd_usage );
432
437
}
433
438
434
439
/* MMIO ATSD register offsets */
435
440
#define XTS_ATSD_AVA 1
436
441
#define XTS_ATSD_STAT 2
437
442
438
- static int mmio_launch_invalidate (struct npu * npu , unsigned long launch ,
439
- unsigned long va )
443
+ static void mmio_launch_invalidate (struct mmio_atsd_reg * mmio_atsd_reg ,
444
+ unsigned long launch , unsigned long va )
440
445
{
441
- int mmio_atsd_reg ;
442
-
443
- do {
444
- mmio_atsd_reg = get_mmio_atsd_reg (npu );
445
- cpu_relax ();
446
- } while (mmio_atsd_reg < 0 );
446
+ struct npu * npu = mmio_atsd_reg -> npu ;
447
+ int reg = mmio_atsd_reg -> reg ;
447
448
448
449
__raw_writeq (cpu_to_be64 (va ),
449
- npu -> mmio_atsd_regs [mmio_atsd_reg ] + XTS_ATSD_AVA );
450
+ npu -> mmio_atsd_regs [reg ] + XTS_ATSD_AVA );
450
451
eieio ();
451
- __raw_writeq (cpu_to_be64 (launch ), npu -> mmio_atsd_regs [mmio_atsd_reg ]);
452
-
453
- return mmio_atsd_reg ;
452
+ __raw_writeq (cpu_to_be64 (launch ), npu -> mmio_atsd_regs [reg ]);
454
453
}
455
454
456
- static int mmio_invalidate_pid (struct npu * npu , unsigned long pid , bool flush )
455
+ static void mmio_invalidate_pid (struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ],
456
+ unsigned long pid , bool flush )
457
457
{
458
+ int i ;
458
459
unsigned long launch ;
459
460
460
- /* IS set to invalidate matching PID */
461
- launch = PPC_BIT (12 );
461
+ for (i = 0 ; i <= max_npu2_index ; i ++ ) {
462
+ if (mmio_atsd_reg [i ].reg < 0 )
463
+ continue ;
464
+
465
+ /* IS set to invalidate matching PID */
466
+ launch = PPC_BIT (12 );
462
467
463
- /* PRS set to process-scoped */
464
- launch |= PPC_BIT (13 );
468
+ /* PRS set to process-scoped */
469
+ launch |= PPC_BIT (13 );
465
470
466
- /* AP */
467
- launch |= (u64 ) mmu_get_ap (mmu_virtual_psize ) << PPC_BITLSHIFT (17 );
471
+ /* AP */
472
+ launch |= (u64 )
473
+ mmu_get_ap (mmu_virtual_psize ) << PPC_BITLSHIFT (17 );
468
474
469
- /* PID */
470
- launch |= pid << PPC_BITLSHIFT (38 );
475
+ /* PID */
476
+ launch |= pid << PPC_BITLSHIFT (38 );
471
477
472
- /* No flush */
473
- launch |= !flush << PPC_BITLSHIFT (39 );
478
+ /* No flush */
479
+ launch |= !flush << PPC_BITLSHIFT (39 );
474
480
475
- /* Invalidating the entire process doesn't use a va */
476
- return mmio_launch_invalidate (npu , launch , 0 );
481
+ /* Invalidating the entire process doesn't use a va */
482
+ mmio_launch_invalidate (& mmio_atsd_reg [i ], launch , 0 );
483
+ }
477
484
}
478
485
479
- static int mmio_invalidate_va (struct npu * npu , unsigned long va ,
480
- unsigned long pid , bool flush )
486
+ static void mmio_invalidate_va (struct mmio_atsd_reg mmio_atsd_reg [ NV_MAX_NPUS ] ,
487
+ unsigned long va , unsigned long pid , bool flush )
481
488
{
489
+ int i ;
482
490
unsigned long launch ;
483
491
484
- /* IS set to invalidate target VA */
485
- launch = 0 ;
492
+ for (i = 0 ; i <= max_npu2_index ; i ++ ) {
493
+ if (mmio_atsd_reg [i ].reg < 0 )
494
+ continue ;
495
+
496
+ /* IS set to invalidate target VA */
497
+ launch = 0 ;
486
498
487
- /* PRS set to process scoped */
488
- launch |= PPC_BIT (13 );
499
+ /* PRS set to process scoped */
500
+ launch |= PPC_BIT (13 );
489
501
490
- /* AP */
491
- launch |= (u64 ) mmu_get_ap (mmu_virtual_psize ) << PPC_BITLSHIFT (17 );
502
+ /* AP */
503
+ launch |= (u64 )
504
+ mmu_get_ap (mmu_virtual_psize ) << PPC_BITLSHIFT (17 );
492
505
493
- /* PID */
494
- launch |= pid << PPC_BITLSHIFT (38 );
506
+ /* PID */
507
+ launch |= pid << PPC_BITLSHIFT (38 );
495
508
496
- /* No flush */
497
- launch |= !flush << PPC_BITLSHIFT (39 );
509
+ /* No flush */
510
+ launch |= !flush << PPC_BITLSHIFT (39 );
498
511
499
- return mmio_launch_invalidate (npu , launch , va );
512
+ mmio_launch_invalidate (& mmio_atsd_reg [i ], launch , va );
513
+ }
500
514
}
501
515
502
516
#define mn_to_npu_context (x ) container_of(x, struct npu_context, mn)
503
517
504
- struct mmio_atsd_reg {
505
- struct npu * npu ;
506
- int reg ;
507
- };
508
-
509
518
static void mmio_invalidate_wait (
510
- struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ], bool flush )
519
+ struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ])
511
520
{
512
521
struct npu * npu ;
513
522
int i , reg ;
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
522
531
reg = mmio_atsd_reg [i ].reg ;
523
532
while (__raw_readq (npu -> mmio_atsd_regs [reg ] + XTS_ATSD_STAT ))
524
533
cpu_relax ();
534
+ }
535
+ }
536
+
537
+ /*
538
+ * Acquires all the address translation shootdown (ATSD) registers required to
539
+ * launch an ATSD on all links this npu_context is active on.
540
+ */
541
+ static void acquire_atsd_reg (struct npu_context * npu_context ,
542
+ struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ])
543
+ {
544
+ int i , j ;
545
+ struct npu * npu ;
546
+ struct pci_dev * npdev ;
547
+ struct pnv_phb * nphb ;
525
548
526
- put_mmio_atsd_reg (npu , reg );
549
+ for (i = 0 ; i <= max_npu2_index ; i ++ ) {
550
+ mmio_atsd_reg [i ].reg = -1 ;
551
+ for (j = 0 ; j < NV_MAX_LINKS ; j ++ ) {
552
+ /*
553
+ * There are no ordering requirements with respect to
554
+ * the setup of struct npu_context, but to ensure
555
+ * consistent behaviour we need to ensure npdev[][] is
556
+ * only read once.
557
+ */
558
+ npdev = READ_ONCE (npu_context -> npdev [i ][j ]);
559
+ if (!npdev )
560
+ continue ;
527
561
562
+ nphb = pci_bus_to_host (npdev -> bus )-> private_data ;
563
+ npu = & nphb -> npu ;
564
+ mmio_atsd_reg [i ].npu = npu ;
565
+ mmio_atsd_reg [i ].reg = get_mmio_atsd_reg (npu );
566
+ while (mmio_atsd_reg [i ].reg < 0 ) {
567
+ mmio_atsd_reg [i ].reg = get_mmio_atsd_reg (npu );
568
+ cpu_relax ();
569
+ }
570
+ break ;
571
+ }
572
+ }
573
+ }
574
+
575
+ /*
576
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
577
+ * must be released in the same order they were acquired above in
578
+ * acquire_atsd_reg.
579
+ */
580
+ static void release_atsd_reg (struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ])
581
+ {
582
+ int i ;
583
+
584
+ for (i = 0 ; i <= max_npu2_index ; i ++ ) {
528
585
/*
529
- * The GPU requires two flush ATSDs to ensure all entries have
530
- * been flushed. We use PID 0 as it will never be used for a
531
- * process on the GPU.
586
+ * We can't rely on npu_context->npdev[][] being the same here
587
+ * as when acquire_atsd_reg() was called, hence we use the
588
+ * values stored in mmio_atsd_reg during the acquire phase
589
+ * rather than re-reading npdev[][].
532
590
*/
533
- if (flush )
534
- mmio_invalidate_pid (npu , 0 , true);
591
+ if (mmio_atsd_reg [i ].reg < 0 )
592
+ continue ;
593
+
594
+ put_mmio_atsd_reg (mmio_atsd_reg [i ].npu , mmio_atsd_reg [i ].reg );
535
595
}
536
596
}
537
597
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
542
602
static void mmio_invalidate (struct npu_context * npu_context , int va ,
543
603
unsigned long address , bool flush )
544
604
{
545
- int i , j ;
546
- struct npu * npu ;
547
- struct pnv_phb * nphb ;
548
- struct pci_dev * npdev ;
549
605
struct mmio_atsd_reg mmio_atsd_reg [NV_MAX_NPUS ];
550
606
unsigned long pid = npu_context -> mm -> context .id ;
551
607
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
561
617
* Loop over all the NPUs this process is active on and launch
562
618
* an invalidate.
563
619
*/
564
- for (i = 0 ; i <= max_npu2_index ; i ++ ) {
565
- mmio_atsd_reg [i ].reg = -1 ;
566
- for (j = 0 ; j < NV_MAX_LINKS ; j ++ ) {
567
- npdev = npu_context -> npdev [i ][j ];
568
- if (!npdev )
569
- continue ;
570
-
571
- nphb = pci_bus_to_host (npdev -> bus )-> private_data ;
572
- npu = & nphb -> npu ;
573
- mmio_atsd_reg [i ].npu = npu ;
574
-
575
- if (va )
576
- mmio_atsd_reg [i ].reg =
577
- mmio_invalidate_va (npu , address , pid ,
578
- flush );
579
- else
580
- mmio_atsd_reg [i ].reg =
581
- mmio_invalidate_pid (npu , pid , flush );
582
-
583
- /*
584
- * The NPU hardware forwards the shootdown to all GPUs
585
- * so we only have to launch one shootdown per NPU.
586
- */
587
- break ;
588
- }
620
+ acquire_atsd_reg (npu_context , mmio_atsd_reg );
621
+ if (va )
622
+ mmio_invalidate_va (mmio_atsd_reg , address , pid , flush );
623
+ else
624
+ mmio_invalidate_pid (mmio_atsd_reg , pid , flush );
625
+
626
+ mmio_invalidate_wait (mmio_atsd_reg );
627
+ if (flush ) {
628
+ /*
629
+ * The GPU requires two flush ATSDs to ensure all entries have
630
+ * been flushed. We use PID 0 as it will never be used for a
631
+ * process on the GPU.
632
+ */
633
+ mmio_invalidate_pid (mmio_atsd_reg , 0 , true);
634
+ mmio_invalidate_wait (mmio_atsd_reg );
635
+ mmio_invalidate_pid (mmio_atsd_reg , 0 , true);
636
+ mmio_invalidate_wait (mmio_atsd_reg );
589
637
}
590
-
591
- mmio_invalidate_wait (mmio_atsd_reg , flush );
592
- if (flush )
593
- /* Wait for the flush to complete */
594
- mmio_invalidate_wait (mmio_atsd_reg , false);
638
+ release_atsd_reg (mmio_atsd_reg );
595
639
}
596
640
597
641
static void pnv_npu2_mn_release (struct mmu_notifier * mn ,
@@ -726,7 +770,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
726
770
if (WARN_ON (of_property_read_u32 (nvlink_dn , "ibm,npu-link-index" ,
727
771
& nvlink_index )))
728
772
return ERR_PTR (- ENODEV );
729
- npu_context -> npdev [npu -> index ][nvlink_index ] = npdev ;
773
+
774
+ /*
775
+ * npdev is a pci_dev pointer setup by the PCI code. We assign it to
776
+ * npdev[][] to indicate to the mmu notifiers that an invalidation
777
+ * should also be sent over this nvlink. The notifiers don't use any
778
+ * other fields in npu_context, so we just need to ensure that when they
779
+ * deference npu_context->npdev[][] it is either a valid pointer or
780
+ * NULL.
781
+ */
782
+ WRITE_ONCE (npu_context -> npdev [npu -> index ][nvlink_index ], npdev );
730
783
731
784
if (!nphb -> npu .nmmu_flush ) {
732
785
/*
@@ -778,7 +831,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
778
831
if (WARN_ON (of_property_read_u32 (nvlink_dn , "ibm,npu-link-index" ,
779
832
& nvlink_index )))
780
833
return ;
781
- npu_context -> npdev [npu -> index ][nvlink_index ] = NULL ;
834
+ WRITE_ONCE ( npu_context -> npdev [npu -> index ][nvlink_index ], NULL ) ;
782
835
opal_npu_destroy_context (nphb -> opal_id , npu_context -> mm -> context .id ,
783
836
PCI_DEVID (gpdev -> bus -> number , gpdev -> devfn ));
784
837
kref_put (& npu_context -> kref , pnv_npu2_release_context );
0 commit comments