Skip to content

Commit 4ddf2a1

Browse files
author
Ingo Molnar
committed
Merge tag 'ras_for_4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras
Pull RAS updates from Borislav Petkov: - RAS: Add support for deferred errors on AMD (Aravind Gopalakrishnan) This is an important RAS feature which adds hardware support for poisoned data. That means roughly that the hardware marks data which it has detected as corrupted but wasn't able to correct, as poisoned data and raises an APIC interrupt to signal that in the form of a deferred error. It is the OS's responsibility then to take proper recovery action and thus prolonge system lifetime as far as possible. - Misc cleanups ontop. (Borislav Petkov)" Signed-off-by: Ingo Molnar <mingo@kernel.org>
2 parents 030bbdb + 3490c0e commit 4ddf2a1

File tree

13 files changed

+182
-42
lines changed

13 files changed

+182
-42
lines changed

arch/x86/include/asm/entry_arch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
5050
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
5151
#endif
5252

53+
#ifdef CONFIG_X86_MCE_AMD
54+
BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
55+
#endif
5356
#endif

arch/x86/include/asm/hardirq.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ typedef struct {
3333
#ifdef CONFIG_X86_MCE_THRESHOLD
3434
unsigned int irq_threshold_count;
3535
#endif
36+
#ifdef CONFIG_X86_MCE_AMD
37+
unsigned int irq_deferred_error_count;
38+
#endif
3639
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
3740
unsigned int irq_hv_callback_count;
3841
#endif

arch/x86/include/asm/hw_irq.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void);
7373
extern asmlinkage void irq_move_cleanup_interrupt(void);
7474
extern asmlinkage void reboot_interrupt(void);
7575
extern asmlinkage void threshold_interrupt(void);
76+
extern asmlinkage void deferred_error_interrupt(void);
7677

7778
extern asmlinkage void call_function_interrupt(void);
7879
extern asmlinkage void call_function_single_interrupt(void);
@@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void);
8788
extern void trace_thermal_interrupt(void);
8889
extern void trace_reschedule_interrupt(void);
8990
extern void trace_threshold_interrupt(void);
91+
extern void trace_deferred_error_interrupt(void);
9092
extern void trace_call_function_interrupt(void);
9193
extern void trace_call_function_single_interrupt(void);
9294
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt

arch/x86/include/asm/irq_vectors.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,21 +102,22 @@
102102
*/
103103
#define X86_PLATFORM_IPI_VECTOR 0xf7
104104

105-
/* Vector for KVM to deliver posted interrupt IPI */
106-
#ifdef CONFIG_HAVE_KVM
107-
#define POSTED_INTR_VECTOR 0xf2
108-
#endif
109-
110105
/*
111106
* IRQ work vector:
112107
*/
113108
#define IRQ_WORK_VECTOR 0xf6
114109

115110
#define UV_BAU_MESSAGE 0xf5
111+
#define DEFERRED_ERROR_VECTOR 0xf4
116112

117113
/* Vector on which hypervisor callbacks will be delivered */
118114
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
119115

116+
/* Vector for KVM to deliver posted interrupt IPI */
117+
#ifdef CONFIG_HAVE_KVM
118+
#define POSTED_INTR_VECTOR 0xf2
119+
#endif
120+
120121
/*
121122
* Local APIC timer IRQ vector is on a different priority level,
122123
* to work around the 'lost local interrupt if more than 2 IRQ

arch/x86/include/asm/mce.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,19 @@ struct mca_config {
117117
};
118118

119119
struct mce_vendor_flags {
120-
__u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
121-
__reserved_0 : 63;
120+
/*
121+
* overflow recovery cpuid bit indicates that overflow
122+
* conditions are not fatal
123+
*/
124+
__u64 overflow_recov : 1,
125+
126+
/*
127+
* SUCCOR stands for S/W UnCorrectable error COntainment
128+
* and Recovery. It indicates support for data poisoning
129+
* in HW and deferred error interrupts.
130+
*/
131+
succor : 1,
132+
__reserved_0 : 62;
122133
};
123134
extern struct mce_vendor_flags mce_flags;
124135

@@ -223,6 +234,9 @@ void do_machine_check(struct pt_regs *, long);
223234
extern void (*mce_threshold_vector)(void);
224235
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
225236

237+
/* Deferred error interrupt handler */
238+
extern void (*deferred_error_int_vector)(void);
239+
226240
/*
227241
* Thermal handler
228242
*/

arch/x86/include/asm/trace/irq_vectors.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
100100
*/
101101
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
102102

103+
/*
104+
* deferred_error_apic - called when entering/exiting a deferred apic interrupt
105+
* vector handler
106+
*/
107+
DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
108+
103109
/*
104110
* thermal_apic - called when entering/exiting a thermal apic interrupt
105111
* vector handler

arch/x86/include/asm/traps.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
108108
void math_emulate(struct math_emu_info *);
109109
#ifndef CONFIG_X86_32
110110
asmlinkage void smp_thermal_interrupt(void);
111-
asmlinkage void mce_threshold_interrupt(void);
111+
asmlinkage void smp_threshold_interrupt(void);
112+
asmlinkage void smp_deferred_error_interrupt(void);
112113
#endif
113114

114115
extern enum ctx_state ist_enter(struct pt_regs *regs);

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,10 +1637,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
16371637
mce_intel_feature_init(c);
16381638
mce_adjust_timer = cmci_intel_adjust_timer;
16391639
break;
1640-
case X86_VENDOR_AMD:
1640+
1641+
case X86_VENDOR_AMD: {
1642+
u32 ebx = cpuid_ebx(0x80000007);
1643+
16411644
mce_amd_feature_init(c);
1642-
mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
1645+
mce_flags.overflow_recov = !!(ebx & BIT(0));
1646+
mce_flags.succor = !!(ebx & BIT(1));
16431647
break;
1648+
}
1649+
16441650
default:
16451651
break;
16461652
}

arch/x86/kernel/cpu/mcheck/mce_amd.c

Lines changed: 121 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,13 @@
11
/*
2-
* (c) 2005-2012 Advanced Micro Devices, Inc.
2+
* (c) 2005-2015 Advanced Micro Devices, Inc.
33
* Your use of this code is subject to the terms and conditions of the
44
* GNU general public license version 2. See "COPYING" or
55
* http://www.gnu.org/licenses/gpl.html
66
*
77
* Written by Jacob Shin - AMD, Inc.
8-
*
98
* Maintained by: Borislav Petkov <bp@alien8.de>
109
*
11-
* April 2006
12-
* - added support for AMD Family 0x10 processors
13-
* May 2012
14-
* - major scrubbing
15-
*
16-
* All MC4_MISCi registers are shared between multi-cores
10+
* All MC4_MISCi registers are shared between cores on a node.
1711
*/
1812
#include <linux/interrupt.h>
1913
#include <linux/notifier.h>
@@ -32,6 +26,7 @@
3226
#include <asm/idle.h>
3327
#include <asm/mce.h>
3428
#include <asm/msr.h>
29+
#include <asm/trace/irq_vectors.h>
3530

3631
#define NR_BLOCKS 9
3732
#define THRESHOLD_MAX 0xFFF
@@ -47,6 +42,13 @@
4742
#define MASK_BLKPTR_LO 0xFF000000
4843
#define MCG_XBLK_ADDR 0xC0000400
4944

45+
/* Deferred error settings */
46+
#define MSR_CU_DEF_ERR 0xC0000410
47+
#define MASK_DEF_LVTOFF 0x000000F0
48+
#define MASK_DEF_INT_TYPE 0x00000006
49+
#define DEF_LVT_OFF 0x2
50+
#define DEF_INT_TYPE_APIC 0x2
51+
5052
static const char * const th_names[] = {
5153
"load_store",
5254
"insn_fetch",
@@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
6062
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
6163

6264
static void amd_threshold_interrupt(void);
65+
static void amd_deferred_error_interrupt(void);
66+
67+
static void default_deferred_error_interrupt(void)
68+
{
69+
pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
70+
}
71+
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
6372

6473
/*
6574
* CPU Initialization
@@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
196205
threshold_restart_bank(&tr);
197206
};
198207

199-
static int setup_APIC_mce(int reserved, int new)
208+
static int setup_APIC_mce_threshold(int reserved, int new)
200209
{
201210
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
202211
APIC_EILVT_MSG_FIX, 0))
@@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
205214
return reserved;
206215
}
207216

217+
static int setup_APIC_deferred_error(int reserved, int new)
218+
{
219+
if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
220+
APIC_EILVT_MSG_FIX, 0))
221+
return new;
222+
223+
return reserved;
224+
}
225+
226+
static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
227+
{
228+
u32 low = 0, high = 0;
229+
int def_offset = -1, def_new;
230+
231+
if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
232+
return;
233+
234+
def_new = (low & MASK_DEF_LVTOFF) >> 4;
235+
if (!(low & MASK_DEF_LVTOFF)) {
236+
pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
237+
def_new = DEF_LVT_OFF;
238+
low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
239+
}
240+
241+
def_offset = setup_APIC_deferred_error(def_offset, def_new);
242+
if ((def_offset == def_new) &&
243+
(deferred_error_int_vector != amd_deferred_error_interrupt))
244+
deferred_error_int_vector = amd_deferred_error_interrupt;
245+
246+
low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
247+
wrmsr(MSR_CU_DEF_ERR, low, high);
248+
}
249+
208250
/* cpu init entry point, called from mce.c with preempt off */
209251
void mce_amd_feature_init(struct cpuinfo_x86 *c)
210252
{
@@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
252294

253295
b.interrupt_enable = 1;
254296
new = (high & MASK_LVTOFF_HI) >> 20;
255-
offset = setup_APIC_mce(offset, new);
297+
offset = setup_APIC_mce_threshold(offset, new);
256298

257299
if ((offset == new) &&
258300
(mce_threshold_vector != amd_threshold_interrupt))
@@ -262,6 +304,73 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
262304
mce_threshold_block_init(&b, offset);
263305
}
264306
}
307+
308+
if (mce_flags.succor)
309+
deferred_error_interrupt_enable(c);
310+
}
311+
312+
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
313+
{
314+
struct mce m;
315+
u64 status;
316+
317+
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
318+
if (!(status & MCI_STATUS_VAL))
319+
return;
320+
321+
mce_setup(&m);
322+
323+
m.status = status;
324+
m.bank = bank;
325+
326+
if (threshold_err)
327+
m.misc = misc;
328+
329+
if (m.status & MCI_STATUS_ADDRV)
330+
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
331+
332+
mce_log(&m);
333+
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
334+
}
335+
336+
static inline void __smp_deferred_error_interrupt(void)
337+
{
338+
inc_irq_stat(irq_deferred_error_count);
339+
deferred_error_int_vector();
340+
}
341+
342+
asmlinkage __visible void smp_deferred_error_interrupt(void)
343+
{
344+
entering_irq();
345+
__smp_deferred_error_interrupt();
346+
exiting_ack_irq();
347+
}
348+
349+
asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
350+
{
351+
entering_irq();
352+
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
353+
__smp_deferred_error_interrupt();
354+
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
355+
exiting_ack_irq();
356+
}
357+
358+
/* APIC interrupt handler for deferred errors */
359+
static void amd_deferred_error_interrupt(void)
360+
{
361+
u64 status;
362+
unsigned int bank;
363+
364+
for (bank = 0; bank < mca_cfg.banks; ++bank) {
365+
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
366+
367+
if (!(status & MCI_STATUS_VAL) ||
368+
!(status & MCI_STATUS_DEFERRED))
369+
continue;
370+
371+
__log_error(bank, false, 0);
372+
break;
373+
}
265374
}
266375

267376
/*
@@ -273,12 +382,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
273382
* the interrupt goes off when error_count reaches threshold_limit.
274383
* the handler will simply log mcelog w/ software defined bank number.
275384
*/
385+
276386
static void amd_threshold_interrupt(void)
277387
{
278388
u32 low = 0, high = 0, address = 0;
279389
int cpu = smp_processor_id();
280390
unsigned int bank, block;
281-
struct mce m;
282391

283392
/* assume first bank caused it */
284393
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
321430
return;
322431

323432
log:
324-
mce_setup(&m);
325-
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
326-
if (!(m.status & MCI_STATUS_VAL))
327-
return;
328-
m.misc = ((u64)high << 32) | low;
329-
m.bank = bank;
330-
mce_log(&m);
331-
332-
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
433+
__log_error(bank, true, ((u64)high << 32) | low);
333434
}
334435

335436
/*

arch/x86/kernel/entry_64.S

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
935935
threshold_interrupt smp_threshold_interrupt
936936
#endif
937937

938+
#ifdef CONFIG_X86_MCE_AMD
939+
apicinterrupt DEFERRED_ERROR_VECTOR \
940+
deferred_error_interrupt smp_deferred_error_interrupt
941+
#endif
942+
938943
#ifdef CONFIG_X86_THERMAL_VECTOR
939944
apicinterrupt THERMAL_APIC_VECTOR \
940945
thermal_interrupt smp_thermal_interrupt

arch/x86/kernel/irq.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
116116
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
117117
seq_puts(p, " Threshold APIC interrupts\n");
118118
#endif
119+
#ifdef CONFIG_X86_MCE_AMD
120+
seq_printf(p, "%*s: ", prec, "DFR");
121+
for_each_online_cpu(j)
122+
seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
123+
seq_puts(p, " Deferred Error APIC interrupts\n");
124+
#endif
119125
#ifdef CONFIG_X86_MCE
120126
seq_printf(p, "%*s: ", prec, "MCE");
121127
for_each_online_cpu(j)

arch/x86/kernel/irqinit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
135135
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
136136
#endif
137137

138+
#ifdef CONFIG_X86_MCE_AMD
139+
alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
140+
#endif
141+
138142
#ifdef CONFIG_X86_LOCAL_APIC
139143
/* self generated IPI for local APIC timer */
140144
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);

0 commit comments

Comments
 (0)