Skip to content

Commit 4900be8

Browse files
committed
x86/vector/msi: Switch to global reservation mode
Devices with many queues allocate a huge number of interrupts and get assigned a vector for each of them, even if the queues are not active and the interrupts never requested. This causes problems with the decision whether the global vector space is sufficient for CPU hot unplug operations. Change it to a reservation scheme, which allows overcommitment. When the interrupt is allocated and initialized the vector assignment merily updates the reservation request counter in the matrix allocator. This counter is used to emit warnings when the reservation exceeds the available vector space, but does not affect CPU offline operations. Like the managed interrupts the corresponding MSI/DMAR/IOAPIC entries are directed to the special shutdown vector. When the interrupt is requested, then the activation code tries to assign a real vector. If that succeeds the interrupt is started up and functional. If that fails, then subsequently request_irq() fails with -ENOSPC. This allows a clear separation of inactive and active modes and simplifies the final decisions whether the global vector space is sufficient for CPU offline operations. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Juergen Gross <jgross@suse.com> Tested-by: Yu Chen <yu.c.chen@intel.com> Acked-by: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rui Zhang <rui.zhang@intel.com> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Len Brown <lenb@kernel.org> Link: https://lkml.kernel.org/r/20170913213156.184211133@linutronix.de
1 parent 2db1f95 commit 4900be8

File tree

1 file changed

+63
-34
lines changed

1 file changed

+63
-34
lines changed

arch/x86/kernel/apic/vector.c

Lines changed: 63 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ struct apic_chip_data {
3333
unsigned int irq;
3434
struct hlist_node clist;
3535
unsigned int move_in_progress : 1,
36-
is_managed : 1;
36+
is_managed : 1,
37+
can_reserve : 1,
38+
has_reserved : 1;
3739
};
3840

3941
struct irq_domain *x86_vector_domain;
@@ -175,9 +177,31 @@ static int reserve_managed_vector(struct irq_data *irqd)
175177
return ret;
176178
}
177179

180+
static void reserve_irq_vector_locked(struct irq_data *irqd)
181+
{
182+
struct apic_chip_data *apicd = apic_chip_data(irqd);
183+
184+
irq_matrix_reserve(vector_matrix);
185+
apicd->can_reserve = true;
186+
apicd->has_reserved = true;
187+
trace_vector_reserve(irqd->irq, 0);
188+
vector_assign_managed_shutdown(irqd);
189+
}
190+
191+
static int reserve_irq_vector(struct irq_data *irqd)
192+
{
193+
unsigned long flags;
194+
195+
raw_spin_lock_irqsave(&vector_lock, flags);
196+
reserve_irq_vector_locked(irqd);
197+
raw_spin_unlock_irqrestore(&vector_lock, flags);
198+
return 0;
199+
}
200+
178201
static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
179202
{
180203
struct apic_chip_data *apicd = apic_chip_data(irqd);
204+
bool resvd = apicd->has_reserved;
181205
unsigned int cpu = apicd->cpu;
182206
int vector = apicd->vector;
183207

@@ -191,10 +215,10 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
191215
if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
192216
return 0;
193217

194-
vector = irq_matrix_alloc(vector_matrix, dest, false, &cpu);
218+
vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
195219
if (vector > 0)
196220
apic_update_vector(irqd, vector, cpu);
197-
trace_vector_alloc(irqd->irq, vector, false, vector);
221+
trace_vector_alloc(irqd->irq, vector, resvd, vector);
198222
return vector;
199223
}
200224

@@ -252,7 +276,11 @@ assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
252276
return reserve_managed_vector(irqd);
253277
if (info->mask)
254278
return assign_irq_vector(irqd, info->mask);
255-
return assign_irq_vector_any(irqd);
279+
if (info->type != X86_IRQ_ALLOC_TYPE_MSI &&
280+
info->type != X86_IRQ_ALLOC_TYPE_MSIX)
281+
return assign_irq_vector_any(irqd);
282+
/* For MSI(X) make only a global reservation with no guarantee */
283+
return reserve_irq_vector(irqd);
256284
}
257285

258286
static int
@@ -314,17 +342,35 @@ static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
314342
unsigned long flags;
315343

316344
trace_vector_deactivate(irqd->irq, apicd->is_managed,
317-
false, false);
345+
apicd->can_reserve, false);
318346

319-
if (apicd->is_managed)
347+
/* Regular fixed assigned interrupt */
348+
if (!apicd->is_managed && !apicd->can_reserve)
349+
return;
350+
/* If the interrupt has a global reservation, nothing to do */
351+
if (apicd->has_reserved)
320352
return;
321353

322354
raw_spin_lock_irqsave(&vector_lock, flags);
323355
clear_irq_vector(irqd);
324-
vector_assign_managed_shutdown(irqd);
356+
if (apicd->can_reserve)
357+
reserve_irq_vector_locked(irqd);
358+
else
359+
vector_assign_managed_shutdown(irqd);
325360
raw_spin_unlock_irqrestore(&vector_lock, flags);
326361
}
327362

363+
static int activate_reserved(struct irq_data *irqd)
364+
{
365+
struct apic_chip_data *apicd = apic_chip_data(irqd);
366+
int ret;
367+
368+
ret = assign_irq_vector_any_locked(irqd);
369+
if (!ret)
370+
apicd->has_reserved = false;
371+
return ret;
372+
}
373+
328374
static int activate_managed(struct irq_data *irqd)
329375
{
330376
const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
@@ -357,16 +403,19 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
357403
int ret = 0;
358404

359405
trace_vector_activate(irqd->irq, apicd->is_managed,
360-
false, early);
406+
apicd->can_reserve, early);
361407

362-
if (!apicd->is_managed)
408+
/* Nothing to do for fixed assigned vectors */
409+
if (!apicd->can_reserve && !apicd->is_managed)
363410
return 0;
364411

365412
raw_spin_lock_irqsave(&vector_lock, flags);
366413
if (early || irqd_is_managed_and_shutdown(irqd))
367414
vector_assign_managed_shutdown(irqd);
368-
else
415+
else if (apicd->is_managed)
369416
ret = activate_managed(irqd);
417+
else if (apicd->has_reserved)
418+
ret = activate_reserved(irqd);
370419
raw_spin_unlock_irqrestore(&vector_lock, flags);
371420
return ret;
372421
}
@@ -376,8 +425,11 @@ static void vector_free_reserved_and_managed(struct irq_data *irqd)
376425
const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
377426
struct apic_chip_data *apicd = apic_chip_data(irqd);
378427

379-
trace_vector_teardown(irqd->irq, apicd->is_managed, false);
428+
trace_vector_teardown(irqd->irq, apicd->is_managed,
429+
apicd->has_reserved);
380430

431+
if (apicd->has_reserved)
432+
irq_matrix_remove_reserved(vector_matrix);
381433
if (apicd->is_managed)
382434
irq_matrix_remove_managed(vector_matrix, dest);
383435
}
@@ -604,22 +656,6 @@ int __init arch_early_irq_init(void)
604656
}
605657

606658
#ifdef CONFIG_SMP
607-
/* Temporary hack to keep things working */
608-
static void vector_update_shutdown_irqs(void)
609-
{
610-
struct irq_desc *desc;
611-
int irq;
612-
613-
for_each_irq_desc(irq, desc) {
614-
struct irq_data *irqd = irq_desc_get_irq_data(desc);
615-
struct apic_chip_data *ad = apic_chip_data(irqd);
616-
617-
if (!ad || !ad->vector || ad->cpu != smp_processor_id())
618-
continue;
619-
this_cpu_write(vector_irq[ad->vector], desc);
620-
irq_matrix_assign(vector_matrix, ad->vector);
621-
}
622-
}
623659

624660
static struct irq_desc *__setup_vector_irq(int vector)
625661
{
@@ -655,13 +691,6 @@ void lapic_online(void)
655691
*/
656692
for (vector = 0; vector < NR_VECTORS; vector++)
657693
this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
658-
659-
/*
660-
* Until the rewrite of the managed interrupt management is in
661-
* place it's necessary to walk the irq descriptors and check for
662-
* interrupts which are targeted at this CPU.
663-
*/
664-
vector_update_shutdown_irqs();
665694
}
666695

667696
void lapic_offline(void)

0 commit comments

Comments
 (0)