Skip to content

Commit 284ce40

Browse files
djbwdavejiang
authored andcommitted
x86/memory_failure: Introduce {set, clear}_mce_nospec()
Currently memory_failure() returns zero if the error was handled. On that result mce_unmap_kpfn() is called to zap the page out of the kernel linear mapping to prevent speculative fetches of potentially poisoned memory. However, in the case of dax mapped devmap pages the page may be in active permanent use by the device driver, so it cannot be unmapped from the kernel. Instead of marking the page not present, marking the page UC should be sufficient for preventing poison from being pre-fetched into the cache. Convert mce_unmap_pfn() to set_mce_nospec() remapping the page as UC, to hide it from speculative accesses. Given that that persistent memory errors can be cleared by the driver, include a facility to restore the page to cacheable operation, clear_mce_nospec(). Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: <linux-edac@vger.kernel.org> Cc: <x86@kernel.org> Acked-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Acked-by: Ingo Molnar <mingo@redhat.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 510ee09 commit 284ce40

File tree

4 files changed

+59
-50
lines changed

4 files changed

+59
-50
lines changed

arch/x86/include/asm/set_memory.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,46 @@ extern int kernel_set_to_readonly;
8888
void set_kernel_text_rw(void);
8989
void set_kernel_text_ro(void);
9090

91+
#ifdef CONFIG_X86_64
92+
static inline int set_mce_nospec(unsigned long pfn)
93+
{
94+
unsigned long decoy_addr;
95+
int rc;
96+
97+
/*
98+
* Mark the linear address as UC to make sure we don't log more
99+
* errors because of speculative access to the page.
100+
* We would like to just call:
101+
* set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
102+
* but doing that would radically increase the odds of a
103+
* speculative access to the poison page because we'd have
104+
* the virtual address of the kernel 1:1 mapping sitting
105+
* around in registers.
106+
* Instead we get tricky. We create a non-canonical address
107+
* that looks just like the one we want, but has bit 63 flipped.
108+
* This relies on set_memory_uc() properly sanitizing any __pa()
109+
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
110+
*/
111+
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
112+
113+
rc = set_memory_uc(decoy_addr, 1);
114+
if (rc)
115+
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
116+
return rc;
117+
}
118+
#define set_mce_nospec set_mce_nospec
119+
120+
/* Restore full speculative operation to the pfn. */
121+
static inline int clear_mce_nospec(unsigned long pfn)
122+
{
123+
return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
124+
}
125+
#define clear_mce_nospec clear_mce_nospec
126+
#else
127+
/*
128+
* Few people would run a 32-bit kernel on a machine that supports
129+
* recoverable errors because they have too much memory to boot 32-bit.
130+
*/
131+
#endif
132+
91133
#endif /* _ASM_X86_SET_MEMORY_H */

arch/x86/kernel/cpu/mcheck/mce-internal.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -113,21 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
113113
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
114114
#endif
115115

116-
#ifndef CONFIG_X86_64
117-
/*
118-
* On 32-bit systems it would be difficult to safely unmap a poison page
119-
* from the kernel 1:1 map because there are no non-canonical addresses that
120-
* we can use to refer to the address without risking a speculative access.
121-
* However, this isn't much of an issue because:
122-
* 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
123-
* are only mapped into the kernel as needed
124-
* 2) Few people would run a 32-bit kernel on a machine that supports
125-
* recoverable errors because they have too much memory to boot 32-bit.
126-
*/
127-
static inline void mce_unmap_kpfn(unsigned long pfn) {}
128-
#define mce_unmap_kpfn mce_unmap_kpfn
129-
#endif
130-
131116
struct mca_config {
132117
bool dont_log_ce;
133118
bool cmci_disabled;

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <linux/irq_work.h>
4343
#include <linux/export.h>
4444
#include <linux/jump_label.h>
45+
#include <linux/set_memory.h>
4546

4647
#include <asm/intel-family.h>
4748
#include <asm/processor.h>
@@ -50,7 +51,6 @@
5051
#include <asm/mce.h>
5152
#include <asm/msr.h>
5253
#include <asm/reboot.h>
53-
#include <asm/set_memory.h>
5454

5555
#include "mce-internal.h"
5656

@@ -108,10 +108,6 @@ static struct irq_work mce_irq_work;
108108

109109
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
110110

111-
#ifndef mce_unmap_kpfn
112-
static void mce_unmap_kpfn(unsigned long pfn);
113-
#endif
114-
115111
/*
116112
* CPU/chipset specific EDAC code can register a notifier call here to print
117113
* MCE errors in a human-readable form.
@@ -602,7 +598,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
602598
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
603599
pfn = mce->addr >> PAGE_SHIFT;
604600
if (!memory_failure(pfn, 0))
605-
mce_unmap_kpfn(pfn);
601+
set_mce_nospec(pfn);
606602
}
607603

608604
return NOTIFY_OK;
@@ -1072,38 +1068,10 @@ static int do_memory_failure(struct mce *m)
10721068
if (ret)
10731069
pr_err("Memory error not recovered");
10741070
else
1075-
mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
1071+
set_mce_nospec(m->addr >> PAGE_SHIFT);
10761072
return ret;
10771073
}
10781074

1079-
#ifndef mce_unmap_kpfn
1080-
static void mce_unmap_kpfn(unsigned long pfn)
1081-
{
1082-
unsigned long decoy_addr;
1083-
1084-
/*
1085-
* Unmap this page from the kernel 1:1 mappings to make sure
1086-
* we don't log more errors because of speculative access to
1087-
* the page.
1088-
* We would like to just call:
1089-
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
1090-
* but doing that would radically increase the odds of a
1091-
* speculative access to the poison page because we'd have
1092-
* the virtual address of the kernel 1:1 mapping sitting
1093-
* around in registers.
1094-
* Instead we get tricky. We create a non-canonical address
1095-
* that looks just like the one we want, but has bit 63 flipped.
1096-
* This relies on set_memory_np() not checking whether we passed
1097-
* a legal address.
1098-
*/
1099-
1100-
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
1101-
1102-
if (set_memory_np(decoy_addr, 1))
1103-
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
1104-
}
1105-
#endif
1106-
11071075
/*
11081076
* The actual machine check handler. This only handles real
11091077
* exceptions when something got corrupted coming in through int 18.

include/linux/set_memory.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
1717
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
1818
#endif
1919

20+
#ifndef set_mce_nospec
21+
static inline int set_mce_nospec(unsigned long pfn)
22+
{
23+
return 0;
24+
}
25+
#endif
26+
27+
#ifndef clear_mce_nospec
28+
static inline int clear_mce_nospec(unsigned long pfn)
29+
{
30+
return 0;
31+
}
32+
#endif
33+
2034
#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
2135
static inline int set_memory_encrypted(unsigned long addr, int numpages)
2236
{

0 commit comments

Comments
 (0)