@@ -327,38 +327,24 @@ static struct iommu_table_group_ops pnv_pci_npu_ops = {
327
327
.unset_window = pnv_npu_unset_window ,
328
328
.take_ownership = pnv_npu_take_ownership ,
329
329
};
330
-
331
- struct pnv_ioda_pe * pnv_pci_npu_setup_iommu (struct pnv_ioda_pe * npe )
332
- {
333
- struct pnv_phb * phb = npe -> phb ;
334
- struct pci_bus * pbus = phb -> hose -> bus ;
335
- struct pci_dev * npdev , * gpdev = NULL , * gptmp ;
336
- struct pnv_ioda_pe * gpe = get_gpu_pci_dev_and_pe (npe , & gpdev );
337
-
338
- if (!gpe || !gpdev )
339
- return NULL ;
340
-
341
- npe -> table_group .ops = & pnv_pci_npu_ops ;
342
-
343
- list_for_each_entry (npdev , & pbus -> devices , bus_list ) {
344
- gptmp = pnv_pci_get_gpu_dev (npdev );
345
-
346
- if (gptmp != gpdev )
347
- continue ;
348
-
349
- pe_info (gpe , "Attached NPU %s\n" , dev_name (& npdev -> dev ));
350
- iommu_group_add_device (gpe -> table_group .group , & npdev -> dev );
351
- }
352
-
353
- return gpe ;
354
- }
355
330
#endif /* !CONFIG_IOMMU_API */
356
331
357
332
/*
358
333
* NPU2 ATS
359
334
*/
360
335
/* Maximum possible number of ATSD MMIO registers per NPU */
361
336
#define NV_NMMU_ATSD_REGS 8
337
+ #define NV_NPU_MAX_PE_NUM 16
338
+
339
+ /*
340
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
341
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
342
+ */
343
+ struct npu_comp {
344
+ struct iommu_table_group table_group ;
345
+ int pe_num ;
346
+ struct pnv_ioda_pe * pe [NV_NPU_MAX_PE_NUM ];
347
+ };
362
348
363
349
/* An NPU descriptor, valid for POWER9 only */
364
350
struct npu {
@@ -371,8 +357,263 @@ struct npu {
371
357
372
358
/* Do we need to explicitly flush the nest mmu? */
373
359
bool nmmu_flush ;
360
+
361
+ struct npu_comp npucomp ;
374
362
};
375
363
364
+ #ifdef CONFIG_IOMMU_API
365
+ static long pnv_npu_peers_create_table_userspace (
366
+ struct iommu_table_group * table_group ,
367
+ int num , __u32 page_shift , __u64 window_size , __u32 levels ,
368
+ struct iommu_table * * ptbl )
369
+ {
370
+ struct npu_comp * npucomp = container_of (table_group , struct npu_comp ,
371
+ table_group );
372
+
373
+ if (!npucomp -> pe_num || !npucomp -> pe [0 ] ||
374
+ !npucomp -> pe [0 ]-> table_group .ops ||
375
+ !npucomp -> pe [0 ]-> table_group .ops -> create_table )
376
+ return - EFAULT ;
377
+
378
+ return npucomp -> pe [0 ]-> table_group .ops -> create_table (
379
+ & npucomp -> pe [0 ]-> table_group , num , page_shift ,
380
+ window_size , levels , ptbl );
381
+ }
382
+
383
+ static long pnv_npu_peers_set_window (struct iommu_table_group * table_group ,
384
+ int num , struct iommu_table * tbl )
385
+ {
386
+ int i , j ;
387
+ long ret = 0 ;
388
+ struct npu_comp * npucomp = container_of (table_group , struct npu_comp ,
389
+ table_group );
390
+
391
+ for (i = 0 ; i < npucomp -> pe_num ; ++ i ) {
392
+ struct pnv_ioda_pe * pe = npucomp -> pe [i ];
393
+
394
+ if (!pe -> table_group .ops -> set_window )
395
+ continue ;
396
+
397
+ ret = pe -> table_group .ops -> set_window (& pe -> table_group ,
398
+ num , tbl );
399
+ if (ret )
400
+ break ;
401
+ }
402
+
403
+ if (ret ) {
404
+ for (j = 0 ; j < i ; ++ j ) {
405
+ struct pnv_ioda_pe * pe = npucomp -> pe [j ];
406
+
407
+ if (!pe -> table_group .ops -> unset_window )
408
+ continue ;
409
+
410
+ ret = pe -> table_group .ops -> unset_window (
411
+ & pe -> table_group , num );
412
+ if (ret )
413
+ break ;
414
+ }
415
+ } else {
416
+ table_group -> tables [num ] = iommu_tce_table_get (tbl );
417
+ }
418
+
419
+ return ret ;
420
+ }
421
+
422
+ static long pnv_npu_peers_unset_window (struct iommu_table_group * table_group ,
423
+ int num )
424
+ {
425
+ int i , j ;
426
+ long ret = 0 ;
427
+ struct npu_comp * npucomp = container_of (table_group , struct npu_comp ,
428
+ table_group );
429
+
430
+ for (i = 0 ; i < npucomp -> pe_num ; ++ i ) {
431
+ struct pnv_ioda_pe * pe = npucomp -> pe [i ];
432
+
433
+ WARN_ON (npucomp -> table_group .tables [num ] !=
434
+ table_group -> tables [num ]);
435
+ if (!npucomp -> table_group .tables [num ])
436
+ continue ;
437
+
438
+ if (!pe -> table_group .ops -> unset_window )
439
+ continue ;
440
+
441
+ ret = pe -> table_group .ops -> unset_window (& pe -> table_group , num );
442
+ if (ret )
443
+ break ;
444
+ }
445
+
446
+ if (ret ) {
447
+ for (j = 0 ; j < i ; ++ j ) {
448
+ struct pnv_ioda_pe * pe = npucomp -> pe [j ];
449
+
450
+ if (!npucomp -> table_group .tables [num ])
451
+ continue ;
452
+
453
+ if (!pe -> table_group .ops -> set_window )
454
+ continue ;
455
+
456
+ ret = pe -> table_group .ops -> set_window (& pe -> table_group ,
457
+ num , table_group -> tables [num ]);
458
+ if (ret )
459
+ break ;
460
+ }
461
+ } else if (table_group -> tables [num ]) {
462
+ iommu_tce_table_put (table_group -> tables [num ]);
463
+ table_group -> tables [num ] = NULL ;
464
+ }
465
+
466
+ return ret ;
467
+ }
468
+
469
+ static void pnv_npu_peers_take_ownership (struct iommu_table_group * table_group )
470
+ {
471
+ int i ;
472
+ struct npu_comp * npucomp = container_of (table_group , struct npu_comp ,
473
+ table_group );
474
+
475
+ for (i = 0 ; i < npucomp -> pe_num ; ++ i ) {
476
+ struct pnv_ioda_pe * pe = npucomp -> pe [i ];
477
+
478
+ if (!pe -> table_group .ops -> take_ownership )
479
+ continue ;
480
+ pe -> table_group .ops -> take_ownership (& pe -> table_group );
481
+ }
482
+ }
483
+
484
+ static void pnv_npu_peers_release_ownership (
485
+ struct iommu_table_group * table_group )
486
+ {
487
+ int i ;
488
+ struct npu_comp * npucomp = container_of (table_group , struct npu_comp ,
489
+ table_group );
490
+
491
+ for (i = 0 ; i < npucomp -> pe_num ; ++ i ) {
492
+ struct pnv_ioda_pe * pe = npucomp -> pe [i ];
493
+
494
+ if (!pe -> table_group .ops -> release_ownership )
495
+ continue ;
496
+ pe -> table_group .ops -> release_ownership (& pe -> table_group );
497
+ }
498
+ }
499
+
500
+ static struct iommu_table_group_ops pnv_npu_peers_ops = {
501
+ .get_table_size = pnv_pci_ioda2_get_table_size ,
502
+ .create_table = pnv_npu_peers_create_table_userspace ,
503
+ .set_window = pnv_npu_peers_set_window ,
504
+ .unset_window = pnv_npu_peers_unset_window ,
505
+ .take_ownership = pnv_npu_peers_take_ownership ,
506
+ .release_ownership = pnv_npu_peers_release_ownership ,
507
+ };
508
+
509
+ static void pnv_comp_attach_table_group (struct npu_comp * npucomp ,
510
+ struct pnv_ioda_pe * pe )
511
+ {
512
+ if (WARN_ON (npucomp -> pe_num == NV_NPU_MAX_PE_NUM ))
513
+ return ;
514
+
515
+ npucomp -> pe [npucomp -> pe_num ] = pe ;
516
+ ++ npucomp -> pe_num ;
517
+ }
518
+
519
+ struct iommu_table_group * pnv_try_setup_npu_table_group (struct pnv_ioda_pe * pe )
520
+ {
521
+ struct iommu_table_group * table_group ;
522
+ struct npu_comp * npucomp ;
523
+ struct pci_dev * gpdev = NULL ;
524
+ struct pci_controller * hose ;
525
+ struct pci_dev * npdev = NULL ;
526
+
527
+ list_for_each_entry (gpdev , & pe -> pbus -> devices , bus_list ) {
528
+ npdev = pnv_pci_get_npu_dev (gpdev , 0 );
529
+ if (npdev )
530
+ break ;
531
+ }
532
+
533
+ if (!npdev )
534
+ /* It is not an NPU attached device, skip */
535
+ return NULL ;
536
+
537
+ hose = pci_bus_to_host (npdev -> bus );
538
+
539
+ if (hose -> npu ) {
540
+ table_group = & hose -> npu -> npucomp .table_group ;
541
+
542
+ if (!table_group -> group ) {
543
+ table_group -> ops = & pnv_npu_peers_ops ;
544
+ iommu_register_group (table_group ,
545
+ hose -> global_number ,
546
+ pe -> pe_number );
547
+ }
548
+ } else {
549
+ /* Create a group for 1 GPU and attached NPUs for POWER8 */
550
+ pe -> npucomp = kzalloc (sizeof (pe -> npucomp ), GFP_KERNEL );
551
+ table_group = & pe -> npucomp -> table_group ;
552
+ table_group -> ops = & pnv_npu_peers_ops ;
553
+ iommu_register_group (table_group , hose -> global_number ,
554
+ pe -> pe_number );
555
+ }
556
+
557
+ /* Steal capabilities from a GPU PE */
558
+ table_group -> max_dynamic_windows_supported =
559
+ pe -> table_group .max_dynamic_windows_supported ;
560
+ table_group -> tce32_start = pe -> table_group .tce32_start ;
561
+ table_group -> tce32_size = pe -> table_group .tce32_size ;
562
+ table_group -> max_levels = pe -> table_group .max_levels ;
563
+ if (!table_group -> pgsizes )
564
+ table_group -> pgsizes = pe -> table_group .pgsizes ;
565
+
566
+ npucomp = container_of (table_group , struct npu_comp , table_group );
567
+ pnv_comp_attach_table_group (npucomp , pe );
568
+
569
+ return table_group ;
570
+ }
571
+
572
+ struct iommu_table_group * pnv_npu_compound_attach (struct pnv_ioda_pe * pe )
573
+ {
574
+ struct iommu_table_group * table_group ;
575
+ struct npu_comp * npucomp ;
576
+ struct pci_dev * gpdev = NULL ;
577
+ struct pci_dev * npdev ;
578
+ struct pnv_ioda_pe * gpe = get_gpu_pci_dev_and_pe (pe , & gpdev );
579
+
580
+ WARN_ON (!(pe -> flags & PNV_IODA_PE_DEV ));
581
+ if (!gpe )
582
+ return NULL ;
583
+
584
+ /*
585
+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
586
+ * but NPU bridges do not have this hook defined so we do it here.
587
+ * We do not setup other table group parameters as they won't be used
588
+ * anyway - NVLink bridges are subordinate PEs.
589
+ */
590
+ pe -> table_group .ops = & pnv_pci_npu_ops ;
591
+
592
+ table_group = iommu_group_get_iommudata (
593
+ iommu_group_get (& gpdev -> dev ));
594
+
595
+ /*
596
+ * On P9 NPU PHB and PCI PHB support different page sizes,
597
+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
598
+ * initialized by the caller.
599
+ */
600
+ table_group -> pgsizes &= pe -> table_group .pgsizes ;
601
+ npucomp = container_of (table_group , struct npu_comp , table_group );
602
+ pnv_comp_attach_table_group (npucomp , pe );
603
+
604
+ list_for_each_entry (npdev , & pe -> phb -> hose -> bus -> devices , bus_list ) {
605
+ struct pci_dev * gpdevtmp = pnv_pci_get_gpu_dev (npdev );
606
+
607
+ if (gpdevtmp != gpdev )
608
+ continue ;
609
+
610
+ iommu_add_device (table_group , & npdev -> dev );
611
+ }
612
+
613
+ return table_group ;
614
+ }
615
+ #endif /* CONFIG_IOMMU_API */
616
+
376
617
/* Maximum number of nvlinks per npu */
377
618
#define NV_MAX_LINKS 6
378
619
0 commit comments