Skip to content

Commit 9345005

Browse files
praritIngo Molnar
authored andcommitted
x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs
During heavy CPU-hotplug operations the following spurious kernel warnings can trigger: do_IRQ: No ... irq handler for vector (irq -1) [ See: https://bugzilla.kernel.org/show_bug.cgi?id=64831 ] When downing a cpu it is possible that there are unhandled irqs left in the APIC IRR register. The following code path shows how the problem can occur: 1. CPU 5 is to go down. 2. cpu_disable() on CPU 5 executes with interrupt flag cleared by local_irq_save() via stop_machine(). 3. IRQ 12 asserts on CPU 5, setting IRR but not ISR because interrupt flag is cleared (CPU unabled to handle the irq) 4. IRQs are migrated off of CPU 5, and the vectors' irqs are set to -1. 5. stop_machine() finishes cpu_disable() 6. cpu_die() for CPU 5 executes in normal context. 7. CPU 5 attempts to handle IRQ 12 because the IRR is set for IRQ 12. The code attempts to find the vector's IRQ and cannot because it has been set to -1. 8. do_IRQ() warning displays warning about CPU 5 IRQ 12. I added a debug printk to output which CPU & vector was retriggered and discovered that that we are getting bogus events. I see a 100% correlation between this debug printk in fixup_irqs() and the do_IRQ() warning. This patchset resolves this by adding definitions for VECTOR_UNDEFINED(-1) and VECTOR_RETRIGGERED(-2) and modifying the code to use them. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=64831 Signed-off-by: Prarit Bhargava <prarit@redhat.com> Reviewed-by: Rui Wang <rui.y.wang@intel.com> Cc: Michel Lespinasse <walken@google.com> Cc: Seiji Aguchi <seiji.aguchi@hds.com> Cc: Yang Zhang <yang.z.zhang@Intel.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: janet.morgan@Intel.com Cc: tony.luck@Intel.com Cc: ruiv.wang@gmail.com Link: http://lkml.kernel.org/r/1388938252-16627-1-git-send-email-prarit@redhat.com [ Cleaned up the code a bit. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 228fdc0 commit 9345005

File tree

4 files changed

+27
-17
lines changed

4 files changed

+27
-17
lines changed

arch/x86/include/asm/hw_irq.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
191191
#define trace_interrupt interrupt
192192
#endif
193193

194+
#define VECTOR_UNDEFINED -1
195+
#define VECTOR_RETRIGGERED -2
196+
194197
typedef int vector_irq_t[NR_VECTORS];
195198
DECLARE_PER_CPU(vector_irq_t, vector_irq);
196199
extern void setup_vector_irq(int cpu);

arch/x86/kernel/apic/io_apic.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,9 +1142,10 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
11421142
if (test_bit(vector, used_vectors))
11431143
goto next;
11441144

1145-
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1146-
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1145+
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
1146+
if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
11471147
goto next;
1148+
}
11481149
/* Found one! */
11491150
current_vector = vector;
11501151
current_offset = offset;
@@ -1183,19 +1184,18 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
11831184

11841185
vector = cfg->vector;
11851186
for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1186-
per_cpu(vector_irq, cpu)[vector] = -1;
1187+
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
11871188

11881189
cfg->vector = 0;
11891190
cpumask_clear(cfg->domain);
11901191

11911192
if (likely(!cfg->move_in_progress))
11921193
return;
11931194
for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1194-
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1195-
vector++) {
1195+
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
11961196
if (per_cpu(vector_irq, cpu)[vector] != irq)
11971197
continue;
1198-
per_cpu(vector_irq, cpu)[vector] = -1;
1198+
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
11991199
break;
12001200
}
12011201
}
@@ -1228,12 +1228,12 @@ void __setup_vector_irq(int cpu)
12281228
/* Mark the free vectors */
12291229
for (vector = 0; vector < NR_VECTORS; ++vector) {
12301230
irq = per_cpu(vector_irq, cpu)[vector];
1231-
if (irq < 0)
1231+
if (irq <= VECTOR_UNDEFINED)
12321232
continue;
12331233

12341234
cfg = irq_cfg(irq);
12351235
if (!cpumask_test_cpu(cpu, cfg->domain))
1236-
per_cpu(vector_irq, cpu)[vector] = -1;
1236+
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
12371237
}
12381238
raw_spin_unlock(&vector_lock);
12391239
}
@@ -2208,7 +2208,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
22082208
struct irq_cfg *cfg;
22092209
irq = __this_cpu_read(vector_irq[vector]);
22102210

2211-
if (irq == -1)
2211+
if (irq <= VECTOR_UNDEFINED)
22122212
continue;
22132213

22142214
desc = irq_to_desc(irq);

arch/x86/kernel/irq.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,13 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
193193
if (!handle_irq(irq, regs)) {
194194
ack_APIC_irq();
195195

196-
if (printk_ratelimit())
197-
pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
198-
__func__, smp_processor_id(), vector, irq);
196+
if (irq != VECTOR_RETRIGGERED) {
197+
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
198+
__func__, smp_processor_id(),
199+
vector, irq);
200+
} else {
201+
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
202+
}
199203
}
200204

201205
irq_exit();
@@ -344,7 +348,7 @@ void fixup_irqs(void)
344348
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
345349
unsigned int irr;
346350

347-
if (__this_cpu_read(vector_irq[vector]) < 0)
351+
if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED)
348352
continue;
349353

350354
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -355,11 +359,14 @@ void fixup_irqs(void)
355359
data = irq_desc_get_irq_data(desc);
356360
chip = irq_data_get_irq_chip(data);
357361
raw_spin_lock(&desc->lock);
358-
if (chip->irq_retrigger)
362+
if (chip->irq_retrigger) {
359363
chip->irq_retrigger(data);
364+
__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
365+
}
360366
raw_spin_unlock(&desc->lock);
361367
}
362-
__this_cpu_write(vector_irq[vector], -1);
368+
if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
369+
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
363370
}
364371
}
365372
#endif

arch/x86/kernel/irqinit.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ static struct irqaction irq2 = {
5252
};
5353

5454
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
55-
[0 ... NR_VECTORS - 1] = -1,
55+
[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
5656
};
5757

5858
int vector_used_by_percpu_irq(unsigned int vector)
5959
{
6060
int cpu;
6161

6262
for_each_online_cpu(cpu) {
63-
if (per_cpu(vector_irq, cpu)[vector] != -1)
63+
if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED)
6464
return 1;
6565
}
6666

0 commit comments

Comments
 (0)