@@ -326,6 +326,25 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
326
326
return gpe ;
327
327
}
328
328
329
+ /*
330
+ * NPU2 ATS
331
+ */
332
+ /* Maximum possible number of ATSD MMIO registers per NPU */
333
+ #define NV_NMMU_ATSD_REGS 8
334
+
335
+ /* An NPU descriptor, valid for POWER9 only */
336
+ struct npu {
337
+ int index ;
338
+ __be64 * mmio_atsd_regs [NV_NMMU_ATSD_REGS ];
339
+ unsigned int mmio_atsd_count ;
340
+
341
+ /* Bitmask for MMIO register usage */
342
+ unsigned long mmio_atsd_usage ;
343
+
344
+ /* Do we need to explicitly flush the nest mmu? */
345
+ bool nmmu_flush ;
346
+ };
347
+
329
348
/* Maximum number of nvlinks per npu */
330
349
#define NV_MAX_LINKS 6
331
350
@@ -477,7 +496,6 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
477
496
int i , j ;
478
497
struct npu * npu ;
479
498
struct pci_dev * npdev ;
480
- struct pnv_phb * nphb ;
481
499
482
500
for (i = 0 ; i <= max_npu2_index ; i ++ ) {
483
501
mmio_atsd_reg [i ].reg = -1 ;
@@ -492,8 +510,7 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
492
510
if (!npdev )
493
511
continue ;
494
512
495
- nphb = pci_bus_to_host (npdev -> bus )-> private_data ;
496
- npu = & nphb -> npu ;
513
+ npu = pci_bus_to_host (npdev -> bus )-> npu ;
497
514
mmio_atsd_reg [i ].npu = npu ;
498
515
mmio_atsd_reg [i ].reg = get_mmio_atsd_reg (npu );
499
516
while (mmio_atsd_reg [i ].reg < 0 ) {
@@ -661,6 +678,7 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
661
678
struct pnv_phb * nphb ;
662
679
struct npu * npu ;
663
680
struct npu_context * npu_context ;
681
+ struct pci_controller * hose ;
664
682
665
683
/*
666
684
* At present we don't support GPUs connected to multiple NPUs and I'm
@@ -688,8 +706,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
688
706
return ERR_PTR (- EINVAL );
689
707
}
690
708
691
- nphb = pci_bus_to_host (npdev -> bus )-> private_data ;
692
- npu = & nphb -> npu ;
709
+ hose = pci_bus_to_host (npdev -> bus );
710
+ nphb = hose -> private_data ;
711
+ npu = hose -> npu ;
693
712
694
713
/*
695
714
* Setup the NPU context table for a particular GPU. These need to be
@@ -763,7 +782,7 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
763
782
*/
764
783
WRITE_ONCE (npu_context -> npdev [npu -> index ][nvlink_index ], npdev );
765
784
766
- if (!nphb -> npu . nmmu_flush ) {
785
+ if (!npu -> nmmu_flush ) {
767
786
/*
768
787
* If we're not explicitly flushing ourselves we need to mark
769
788
* the thread for global flushes
@@ -801,15 +820,17 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
801
820
struct pci_dev * npdev = pnv_pci_get_npu_dev (gpdev , 0 );
802
821
struct device_node * nvlink_dn ;
803
822
u32 nvlink_index ;
823
+ struct pci_controller * hose ;
804
824
805
825
if (WARN_ON (!npdev ))
806
826
return ;
807
827
808
828
if (!firmware_has_feature (FW_FEATURE_OPAL ))
809
829
return ;
810
830
811
- nphb = pci_bus_to_host (npdev -> bus )-> private_data ;
812
- npu = & nphb -> npu ;
831
+ hose = pci_bus_to_host (npdev -> bus );
832
+ nphb = hose -> private_data ;
833
+ npu = hose -> npu ;
813
834
nvlink_dn = of_parse_phandle (npdev -> dev .of_node , "ibm,nvlink" , 0 );
814
835
if (WARN_ON (of_property_read_u32 (nvlink_dn , "ibm,npu-link-index" ,
815
836
& nvlink_index )))
@@ -887,9 +908,15 @@ int pnv_npu2_init(struct pnv_phb *phb)
887
908
struct pci_dev * gpdev ;
888
909
static int npu_index ;
889
910
uint64_t rc = 0 ;
911
+ struct pci_controller * hose = phb -> hose ;
912
+ struct npu * npu ;
913
+ int ret ;
914
+
915
+ npu = kzalloc (sizeof (* npu ), GFP_KERNEL );
916
+ if (!npu )
917
+ return - ENOMEM ;
890
918
891
- phb -> npu .nmmu_flush =
892
- of_property_read_bool (phb -> hose -> dn , "ibm,nmmu-flush" );
919
+ npu -> nmmu_flush = of_property_read_bool (hose -> dn , "ibm,nmmu-flush" );
893
920
for_each_child_of_node (phb -> hose -> dn , dn ) {
894
921
gpdev = pnv_pci_get_gpu_dev (get_pci_dev (dn ));
895
922
if (gpdev ) {
@@ -903,18 +930,29 @@ int pnv_npu2_init(struct pnv_phb *phb)
903
930
}
904
931
}
905
932
906
- for (i = 0 ; !of_property_read_u64_index (phb -> hose -> dn , "ibm,mmio-atsd" ,
933
+ for (i = 0 ; !of_property_read_u64_index (hose -> dn , "ibm,mmio-atsd" ,
907
934
i , & mmio_atsd ); i ++ )
908
- phb -> npu . mmio_atsd_regs [i ] = ioremap (mmio_atsd , 32 );
935
+ npu -> mmio_atsd_regs [i ] = ioremap (mmio_atsd , 32 );
909
936
910
- pr_info ("NPU%lld : Found %d MMIO ATSD registers" , phb -> opal_id , i );
911
- phb -> npu . mmio_atsd_count = i ;
912
- phb -> npu . mmio_atsd_usage = 0 ;
937
+ pr_info ("NPU%d : Found %d MMIO ATSD registers" , hose -> global_number , i );
938
+ npu -> mmio_atsd_count = i ;
939
+ npu -> mmio_atsd_usage = 0 ;
913
940
npu_index ++ ;
914
- if (WARN_ON (npu_index >= NV_MAX_NPUS ))
915
- return - ENOSPC ;
941
+ if (WARN_ON (npu_index >= NV_MAX_NPUS )) {
942
+ ret = - ENOSPC ;
943
+ goto fail_exit ;
944
+ }
916
945
max_npu2_index = npu_index ;
917
- phb -> npu .index = npu_index ;
946
+ npu -> index = npu_index ;
947
+ hose -> npu = npu ;
918
948
919
949
return 0 ;
950
+
951
+ fail_exit :
952
+ for (i = 0 ; i < npu -> mmio_atsd_count ; ++ i )
953
+ iounmap (npu -> mmio_atsd_regs [i ]);
954
+
955
+ kfree (npu );
956
+
957
+ return ret ;
920
958
}
0 commit comments