@@ -416,13 +416,14 @@ static void its_send_single_command(struct its_node *its,
416
416
{
417
417
struct its_cmd_block * cmd , * sync_cmd , * next_cmd ;
418
418
struct its_collection * sync_col ;
419
+ unsigned long flags ;
419
420
420
- raw_spin_lock (& its -> lock );
421
+ raw_spin_lock_irqsave (& its -> lock , flags );
421
422
422
423
cmd = its_allocate_entry (its );
423
424
if (!cmd ) { /* We're soooooo screewed... */
424
425
pr_err_ratelimited ("ITS can't allocate, dropping command\n" );
425
- raw_spin_unlock (& its -> lock );
426
+ raw_spin_unlock_irqrestore (& its -> lock , flags );
426
427
return ;
427
428
}
428
429
sync_col = builder (cmd , desc );
@@ -442,7 +443,7 @@ static void its_send_single_command(struct its_node *its,
442
443
443
444
post :
444
445
next_cmd = its_post_commands (its );
445
- raw_spin_unlock (& its -> lock );
446
+ raw_spin_unlock_irqrestore (& its -> lock , flags );
446
447
447
448
its_wait_for_range_completion (its , cmd , next_cmd );
448
449
}
@@ -799,21 +800,43 @@ static int its_alloc_tables(struct its_node *its)
799
800
{
800
801
int err ;
801
802
int i ;
802
- int psz = PAGE_SIZE ;
803
+ int psz = SZ_64K ;
803
804
u64 shr = GITS_BASER_InnerShareable ;
804
805
805
806
for (i = 0 ; i < GITS_BASER_NR_REGS ; i ++ ) {
806
807
u64 val = readq_relaxed (its -> base + GITS_BASER + i * 8 );
807
808
u64 type = GITS_BASER_TYPE (val );
808
809
u64 entry_size = GITS_BASER_ENTRY_SIZE (val );
810
+ int order = get_order (psz );
811
+ int alloc_size ;
809
812
u64 tmp ;
810
813
void * base ;
811
814
812
815
if (type == GITS_BASER_TYPE_NONE )
813
816
continue ;
814
817
815
- /* We're lazy and only allocate a single page for now */
816
- base = (void * )get_zeroed_page (GFP_KERNEL );
818
+ /*
819
+ * Allocate as many entries as required to fit the
820
+ * range of device IDs that the ITS can grok... The ID
821
+ * space being incredibly sparse, this results in a
822
+ * massive waste of memory.
823
+ *
824
+ * For other tables, only allocate a single page.
825
+ */
826
+ if (type == GITS_BASER_TYPE_DEVICE ) {
827
+ u64 typer = readq_relaxed (its -> base + GITS_TYPER );
828
+ u32 ids = GITS_TYPER_DEVBITS (typer );
829
+
830
+ order = get_order ((1UL << ids ) * entry_size );
831
+ if (order >= MAX_ORDER ) {
832
+ order = MAX_ORDER - 1 ;
833
+ pr_warn ("%s: Device Table too large, reduce its page order to %u\n" ,
834
+ its -> msi_chip .of_node -> full_name , order );
835
+ }
836
+ }
837
+
838
+ alloc_size = (1 << order ) * PAGE_SIZE ;
839
+ base = (void * )__get_free_pages (GFP_KERNEL | __GFP_ZERO , order );
817
840
if (!base ) {
818
841
err = - ENOMEM ;
819
842
goto out_free ;
@@ -841,7 +864,7 @@ static int its_alloc_tables(struct its_node *its)
841
864
break ;
842
865
}
843
866
844
- val |= (PAGE_SIZE / psz ) - 1 ;
867
+ val |= (alloc_size / psz ) - 1 ;
845
868
846
869
writeq_relaxed (val , its -> base + GITS_BASER + i * 8 );
847
870
tmp = readq_relaxed (its -> base + GITS_BASER + i * 8 );
@@ -882,7 +905,7 @@ static int its_alloc_tables(struct its_node *its)
882
905
}
883
906
884
907
pr_info ("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n" ,
885
- (int )(PAGE_SIZE / entry_size ),
908
+ (int )(alloc_size / entry_size ),
886
909
its_base_type_string [type ],
887
910
(unsigned long )virt_to_phys (base ),
888
911
psz / SZ_1K , (int )shr >> GITS_BASER_SHAREABILITY_SHIFT );
@@ -1020,8 +1043,9 @@ static void its_cpu_init_collection(void)
1020
1043
static struct its_device * its_find_device (struct its_node * its , u32 dev_id )
1021
1044
{
1022
1045
struct its_device * its_dev = NULL , * tmp ;
1046
+ unsigned long flags ;
1023
1047
1024
- raw_spin_lock (& its -> lock );
1048
+ raw_spin_lock_irqsave (& its -> lock , flags );
1025
1049
1026
1050
list_for_each_entry (tmp , & its -> its_device_list , entry ) {
1027
1051
if (tmp -> device_id == dev_id ) {
@@ -1030,7 +1054,7 @@ static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
1030
1054
}
1031
1055
}
1032
1056
1033
- raw_spin_unlock (& its -> lock );
1057
+ raw_spin_unlock_irqrestore (& its -> lock , flags );
1034
1058
1035
1059
return its_dev ;
1036
1060
}
@@ -1040,6 +1064,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
1040
1064
{
1041
1065
struct its_device * dev ;
1042
1066
unsigned long * lpi_map ;
1067
+ unsigned long flags ;
1043
1068
void * itt ;
1044
1069
int lpi_base ;
1045
1070
int nr_lpis ;
@@ -1056,7 +1081,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
1056
1081
nr_ites = max (2UL , roundup_pow_of_two (nvecs ));
1057
1082
sz = nr_ites * its -> ite_size ;
1058
1083
sz = max (sz , ITS_ITT_ALIGN ) + ITS_ITT_ALIGN - 1 ;
1059
- itt = kmalloc (sz , GFP_KERNEL );
1084
+ itt = kzalloc (sz , GFP_KERNEL );
1060
1085
lpi_map = its_lpi_alloc_chunks (nvecs , & lpi_base , & nr_lpis );
1061
1086
1062
1087
if (!dev || !itt || !lpi_map ) {
@@ -1075,9 +1100,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
1075
1100
dev -> device_id = dev_id ;
1076
1101
INIT_LIST_HEAD (& dev -> entry );
1077
1102
1078
- raw_spin_lock (& its -> lock );
1103
+ raw_spin_lock_irqsave (& its -> lock , flags );
1079
1104
list_add (& dev -> entry , & its -> its_device_list );
1080
- raw_spin_unlock (& its -> lock );
1105
+ raw_spin_unlock_irqrestore (& its -> lock , flags );
1081
1106
1082
1107
/* Bind the device to the first possible CPU */
1083
1108
cpu = cpumask_first (cpu_online_mask );
@@ -1091,9 +1116,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
1091
1116
1092
1117
static void its_free_device (struct its_device * its_dev )
1093
1118
{
1094
- raw_spin_lock (& its_dev -> its -> lock );
1119
+ unsigned long flags ;
1120
+
1121
+ raw_spin_lock_irqsave (& its_dev -> its -> lock , flags );
1095
1122
list_del (& its_dev -> entry );
1096
- raw_spin_unlock (& its_dev -> its -> lock );
1123
+ raw_spin_unlock_irqrestore (& its_dev -> its -> lock , flags );
1097
1124
kfree (its_dev -> itt );
1098
1125
kfree (its_dev );
1099
1126
}
@@ -1112,31 +1139,69 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
1112
1139
return 0 ;
1113
1140
}
1114
1141
1142
+ struct its_pci_alias {
1143
+ struct pci_dev * pdev ;
1144
+ u32 dev_id ;
1145
+ u32 count ;
1146
+ };
1147
+
1148
+ static int its_pci_msi_vec_count (struct pci_dev * pdev )
1149
+ {
1150
+ int msi , msix ;
1151
+
1152
+ msi = max (pci_msi_vec_count (pdev ), 0 );
1153
+ msix = max (pci_msix_vec_count (pdev ), 0 );
1154
+
1155
+ return max (msi , msix );
1156
+ }
1157
+
1158
+ static int its_get_pci_alias (struct pci_dev * pdev , u16 alias , void * data )
1159
+ {
1160
+ struct its_pci_alias * dev_alias = data ;
1161
+
1162
+ dev_alias -> dev_id = alias ;
1163
+ if (pdev != dev_alias -> pdev )
1164
+ dev_alias -> count += its_pci_msi_vec_count (dev_alias -> pdev );
1165
+
1166
+ return 0 ;
1167
+ }
1168
+
1115
1169
static int its_msi_prepare (struct irq_domain * domain , struct device * dev ,
1116
1170
int nvec , msi_alloc_info_t * info )
1117
1171
{
1118
1172
struct pci_dev * pdev ;
1119
1173
struct its_node * its ;
1120
- u32 dev_id ;
1121
1174
struct its_device * its_dev ;
1175
+ struct its_pci_alias dev_alias ;
1122
1176
1123
1177
if (!dev_is_pci (dev ))
1124
1178
return - EINVAL ;
1125
1179
1126
1180
pdev = to_pci_dev (dev );
1127
- dev_id = PCI_DEVID (pdev -> bus -> number , pdev -> devfn );
1181
+ dev_alias .pdev = pdev ;
1182
+ dev_alias .count = nvec ;
1183
+
1184
+ pci_for_each_dma_alias (pdev , its_get_pci_alias , & dev_alias );
1128
1185
its = domain -> parent -> host_data ;
1129
1186
1130
- its_dev = its_find_device (its , dev_id );
1131
- if (WARN_ON (its_dev ))
1132
- return - EINVAL ;
1187
+ its_dev = its_find_device (its , dev_alias .dev_id );
1188
+ if (its_dev ) {
1189
+ /*
1190
+ * We already have seen this ID, probably through
1191
+ * another alias (PCI bridge of some sort). No need to
1192
+ * create the device.
1193
+ */
1194
+ dev_dbg (dev , "Reusing ITT for devID %x\n" , dev_alias .dev_id );
1195
+ goto out ;
1196
+ }
1133
1197
1134
- its_dev = its_create_device (its , dev_id , nvec );
1198
+ its_dev = its_create_device (its , dev_alias . dev_id , dev_alias . count );
1135
1199
if (!its_dev )
1136
1200
return - ENOMEM ;
1137
1201
1138
- dev_dbg (& pdev -> dev , "ITT %d entries, %d bits\n" , nvec , ilog2 (nvec ));
1139
-
1202
+ dev_dbg (& pdev -> dev , "ITT %d entries, %d bits\n" ,
1203
+ dev_alias .count , ilog2 (dev_alias .count ));
1204
+ out :
1140
1205
info -> scratchpad [0 ].ptr = its_dev ;
1141
1206
info -> scratchpad [1 ].ptr = dev ;
1142
1207
return 0 ;
@@ -1255,6 +1320,34 @@ static const struct irq_domain_ops its_domain_ops = {
1255
1320
.deactivate = its_irq_domain_deactivate ,
1256
1321
};
1257
1322
1323
+ static int its_force_quiescent (void __iomem * base )
1324
+ {
1325
+ u32 count = 1000000 ; /* 1s */
1326
+ u32 val ;
1327
+
1328
+ val = readl_relaxed (base + GITS_CTLR );
1329
+ if (val & GITS_CTLR_QUIESCENT )
1330
+ return 0 ;
1331
+
1332
+ /* Disable the generation of all interrupts to this ITS */
1333
+ val &= ~GITS_CTLR_ENABLE ;
1334
+ writel_relaxed (val , base + GITS_CTLR );
1335
+
1336
+ /* Poll GITS_CTLR and wait until ITS becomes quiescent */
1337
+ while (1 ) {
1338
+ val = readl_relaxed (base + GITS_CTLR );
1339
+ if (val & GITS_CTLR_QUIESCENT )
1340
+ return 0 ;
1341
+
1342
+ count -- ;
1343
+ if (!count )
1344
+ return - EBUSY ;
1345
+
1346
+ cpu_relax ();
1347
+ udelay (1 );
1348
+ }
1349
+ }
1350
+
1258
1351
static int its_probe (struct device_node * node , struct irq_domain * parent )
1259
1352
{
1260
1353
struct resource res ;
@@ -1283,6 +1376,13 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
1283
1376
goto out_unmap ;
1284
1377
}
1285
1378
1379
+ err = its_force_quiescent (its_base );
1380
+ if (err ) {
1381
+ pr_warn ("%s: failed to quiesce, giving up\n" ,
1382
+ node -> full_name );
1383
+ goto out_unmap ;
1384
+ }
1385
+
1286
1386
pr_info ("ITS: %s\n" , node -> full_name );
1287
1387
1288
1388
its = kzalloc (sizeof (* its ), GFP_KERNEL );
@@ -1323,7 +1423,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
1323
1423
writeq_relaxed (baser , its -> base + GITS_CBASER );
1324
1424
tmp = readq_relaxed (its -> base + GITS_CBASER );
1325
1425
writeq_relaxed (0 , its -> base + GITS_CWRITER );
1326
- writel_relaxed (1 , its -> base + GITS_CTLR );
1426
+ writel_relaxed (GITS_CTLR_ENABLE , its -> base + GITS_CTLR );
1327
1427
1328
1428
if ((tmp ^ baser ) & GITS_BASER_SHAREABILITY_MASK ) {
1329
1429
pr_info ("ITS: using cache flushing for cmd queue\n" );
@@ -1382,12 +1482,11 @@ static bool gic_rdists_supports_plpis(void)
1382
1482
1383
1483
int its_cpu_init (void )
1384
1484
{
1385
- if (!gic_rdists_supports_plpis ()) {
1386
- pr_info ("CPU%d: LPIs not supported\n" , smp_processor_id ());
1387
- return - ENXIO ;
1388
- }
1389
-
1390
1485
if (!list_empty (& its_nodes )) {
1486
+ if (!gic_rdists_supports_plpis ()) {
1487
+ pr_info ("CPU%d: LPIs not supported\n" , smp_processor_id ());
1488
+ return - ENXIO ;
1489
+ }
1391
1490
its_cpu_init_lpis ();
1392
1491
its_cpu_init_collection ();
1393
1492
}
0 commit comments