Skip to content

Commit 899ba79

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "Speculation: - Make the microcode check more robust - Make the L1TF memory limit depend on the internal cache physical address space and not on the CPUID advertised physical address space, which might be significantly smaller. This avoids disabling L1TF on machines which utilize the full physical address space. - Fix the GDT mapping for EFI calls on 32bit PTI - Fix the MCE nospec implementation to prevent #GP Fixes and robustness: - Use the proper operand order for LSL in the VDSO - Prevent NMI uaccess race against CR3 switching - Add a lockdep check to verify that text_mutex is held in text_poke() functions - Repair the fallout of giving native_restore_fl() a prototype - Prevent kernel memory dumps based on usermode RIP - Wipe KASAN shadow stack before rewinding the stack to prevent false positives - Move the AMS GOTO enforcement to the actual build stage to allow user API header extraction without a compiler - Fix a section mismatch introduced by the on demand VDSO mapping change Miscellaneous: - Trivial typo, GCC quirk removal and CC_SET/OUT() cleanups" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/pti: Fix section mismatch warning/error x86/vdso: Fix lsl operand order x86/mce: Fix set_mce_nospec() to avoid #GP fault x86/efi: Load fixmap GDT in efi_call_phys_epilog() x86/nmi: Fix NMI uaccess race against CR3 switching x86: Allow generating user-space headers without a compiler x86/dumpstack: Don't dump kernel memory based on usermode RIP x86/asm: Use CC_SET()/CC_OUT() in __gen_sigismember() x86/alternatives: Lockdep-enforce text_mutex in text_poke*() x86/entry/64: Wipe KASAN stack shadow before rewind_stack_do_exit() x86/irqflags: Mark native_restore_fl extern inline x86/build: Remove jump label quirk for GCC older than 4.5.2 x86/Kconfig: Fix trivial typo x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+ x86/spectre: Add missing family 6 check to microcode check
2 parents 1395d10 + ff924c5 commit 899ba79

File tree

21 files changed

+167
-50
lines changed

21 files changed

+167
-50
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2843,7 +2843,7 @@ config X86_SYSFB
28432843
This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
28442844
framebuffers so the new generic system-framebuffer drivers can be
28452845
used on x86. If the framebuffer is not compatible with the generic
2846-
modes, it is adverticed as fallback platform framebuffer so legacy
2846+
modes, it is advertised as fallback platform framebuffer so legacy
28472847
drivers like efifb, vesafb and uvesafb can pick it up.
28482848
If this option is not selected, all system framebuffers are always
28492849
marked as fallback platform framebuffers as usual.

arch/x86/Makefile

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -175,22 +175,6 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
175175
endif
176176
endif
177177

178-
ifndef CC_HAVE_ASM_GOTO
179-
$(error Compiler lacks asm-goto support.)
180-
endif
181-
182-
#
183-
# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
184-
# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
185-
# to test for this bug at compile-time because the test case needs to execute,
186-
# which is a no-go for cross compilers. So check the GCC version instead.
187-
#
188-
ifdef CONFIG_JUMP_LABEL
189-
ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
190-
ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
191-
endif
192-
endif
193-
194178
ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
195179
# This compiler flag is not supported by Clang:
196180
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
@@ -312,6 +296,13 @@ PHONY += vdso_install
312296
vdso_install:
313297
$(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
314298

299+
archprepare: checkbin
300+
checkbin:
301+
ifndef CC_HAVE_ASM_GOTO
302+
@echo Compiler lacks asm-goto support.
303+
@exit 1
304+
endif
305+
315306
archclean:
316307
$(Q)rm -rf $(objtree)/arch/i386
317308
$(Q)rm -rf $(objtree)/arch/x86_64

arch/x86/events/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
24652465

24662466
perf_callchain_store(entry, regs->ip);
24672467

2468-
if (!current->mm)
2468+
if (!nmi_uaccess_okay())
24692469
return;
24702470

24712471
if (perf_callchain_user32(regs, entry))

arch/x86/include/asm/irqflags.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
3333
return flags;
3434
}
3535

36-
static inline void native_restore_fl(unsigned long flags)
36+
extern inline void native_restore_fl(unsigned long flags);
37+
extern inline void native_restore_fl(unsigned long flags)
3738
{
3839
asm volatile("push %0 ; popf"
3940
: /* no output */

arch/x86/include/asm/processor.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
132132
/* Index into per_cpu list: */
133133
u16 cpu_index;
134134
u32 microcode;
135+
/* Address space bits used by the cache internally */
136+
u8 x86_cache_bits;
135137
unsigned initialized : 1;
136138
} __randomize_layout;
137139

@@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
183185

184186
static inline unsigned long long l1tf_pfn_limit(void)
185187
{
186-
return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
188+
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
187189
}
188190

189191
extern void early_cpu_init(void);

arch/x86/include/asm/signal.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs);
3939

4040
#define __ARCH_HAS_SA_RESTORER
4141

42+
#include <asm/asm.h>
4243
#include <uapi/asm/sigcontext.h>
4344

4445
#ifdef __i386__
@@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
8687

8788
static inline int __gen_sigismember(sigset_t *set, int _sig)
8889
{
89-
unsigned char ret;
90-
asm("btl %2,%1\n\tsetc %0"
91-
: "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
90+
bool ret;
91+
asm("btl %2,%1" CC_SET(c)
92+
: CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
9293
return ret;
9394
}
9495

arch/x86/include/asm/stacktrace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
111111
return (unsigned long)frame;
112112
}
113113

114-
void show_opcodes(u8 *rip, const char *loglvl);
114+
void show_opcodes(struct pt_regs *regs, const char *loglvl);
115115
void show_ip(struct pt_regs *regs, const char *loglvl);
116116
#endif /* _ASM_X86_STACKTRACE_H */

arch/x86/include/asm/tlbflush.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,16 @@ struct tlb_state {
175175
* are on. This means that it may not match current->active_mm,
176176
* which will contain the previous user mm when we're in lazy TLB
177177
* mode even if we've already switched back to swapper_pg_dir.
178+
*
179+
* During switch_mm_irqs_off(), loaded_mm will be set to
180+
* LOADED_MM_SWITCHING during the brief interrupts-off window
181+
* when CR3 and loaded_mm would otherwise be inconsistent. This
182+
* is for nmi_uaccess_okay()'s benefit.
178183
*/
179184
struct mm_struct *loaded_mm;
185+
186+
#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
187+
180188
u16 loaded_mm_asid;
181189
u16 next_asid;
182190
/* last user mm's ctx id */
@@ -246,6 +254,38 @@ struct tlb_state {
246254
};
247255
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
248256

257+
/*
258+
* Blindly accessing user memory from NMI context can be dangerous
259+
* if we're in the middle of switching the current user task or
260+
* switching the loaded mm. It can also be dangerous if we
261+
* interrupted some kernel code that was temporarily using a
262+
* different mm.
263+
*/
264+
static inline bool nmi_uaccess_okay(void)
265+
{
266+
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
267+
struct mm_struct *current_mm = current->mm;
268+
269+
VM_WARN_ON_ONCE(!loaded_mm);
270+
271+
/*
272+
* The condition we want to check is
273+
* current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
274+
* if we're running in a VM with shadow paging, and nmi_uaccess_okay()
275+
* is supposed to be reasonably fast.
276+
*
277+
* Instead, we check the almost equivalent but somewhat conservative
278+
* condition below, and we rely on the fact that switch_mm_irqs_off()
279+
* sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
280+
*/
281+
if (loaded_mm != current_mm)
282+
return false;
283+
284+
VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
285+
286+
return true;
287+
}
288+
249289
/* Initialize cr4 shadow for this CPU. */
250290
static inline void cr4_init_shadow(void)
251291
{

arch/x86/include/asm/vgtod.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
9393
*
9494
* If RDPID is available, use it.
9595
*/
96-
alternative_io ("lsl %[p],%[seg]",
96+
alternative_io ("lsl %[seg],%[p]",
9797
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
9898
X86_FEATURE_RDPID,
9999
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));

arch/x86/kernel/alternative.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
684684
* It means the size must be writable atomically and the address must be aligned
685685
* in a way that permits an atomic write. It also makes sure we fit on a single
686686
* page.
687-
*
688-
* Note: Must be called under text_mutex.
689687
*/
690688
void *text_poke(void *addr, const void *opcode, size_t len)
691689
{
@@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len)
700698
*/
701699
BUG_ON(!after_bootmem);
702700

701+
lockdep_assert_held(&text_mutex);
702+
703703
if (!core_kernel_text((unsigned long)addr)) {
704704
pages[0] = vmalloc_to_page(addr);
705705
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs)
782782
* - replace the first byte (int3) by the first byte of
783783
* replacing opcode
784784
* - sync cores
785-
*
786-
* Note: must be called under text_mutex.
787785
*/
788786
void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
789787
{
@@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
792790
bp_int3_handler = handler;
793791
bp_int3_addr = (u8 *)addr + sizeof(int3);
794792
bp_patching_in_progress = true;
793+
794+
lockdep_assert_held(&text_mutex);
795+
795796
/*
796797
* Corresponding read barrier in int3 notifier for making sure the
797798
* in_progress and handler are correctly ordered wrt. patching.

arch/x86/kernel/cpu/bugs.c

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -668,13 +668,54 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
668668
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
669669
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
670670

671+
/*
672+
* These CPUs all support 44bits physical address space internally in the
673+
* cache but CPUID can report a smaller number of physical address bits.
674+
*
675+
* The L1TF mitigation uses the top most address bit for the inversion of
676+
* non present PTEs. When the installed memory reaches into the top most
677+
* address bit due to memory holes, which has been observed on machines
678+
* which report 36bits physical address bits and have 32G RAM installed,
679+
* then the mitigation range check in l1tf_select_mitigation() triggers.
680+
* This is a false positive because the mitigation is still possible due to
681+
* the fact that the cache uses 44bit internally. Use the cache bits
682+
* instead of the reported physical bits and adjust them on the affected
683+
* machines to 44bit if the reported bits are less than 44.
684+
*/
685+
static void override_cache_bits(struct cpuinfo_x86 *c)
686+
{
687+
if (c->x86 != 6)
688+
return;
689+
690+
switch (c->x86_model) {
691+
case INTEL_FAM6_NEHALEM:
692+
case INTEL_FAM6_WESTMERE:
693+
case INTEL_FAM6_SANDYBRIDGE:
694+
case INTEL_FAM6_IVYBRIDGE:
695+
case INTEL_FAM6_HASWELL_CORE:
696+
case INTEL_FAM6_HASWELL_ULT:
697+
case INTEL_FAM6_HASWELL_GT3E:
698+
case INTEL_FAM6_BROADWELL_CORE:
699+
case INTEL_FAM6_BROADWELL_GT3E:
700+
case INTEL_FAM6_SKYLAKE_MOBILE:
701+
case INTEL_FAM6_SKYLAKE_DESKTOP:
702+
case INTEL_FAM6_KABYLAKE_MOBILE:
703+
case INTEL_FAM6_KABYLAKE_DESKTOP:
704+
if (c->x86_cache_bits < 44)
705+
c->x86_cache_bits = 44;
706+
break;
707+
}
708+
}
709+
671710
static void __init l1tf_select_mitigation(void)
672711
{
673712
u64 half_pa;
674713

675714
if (!boot_cpu_has_bug(X86_BUG_L1TF))
676715
return;
677716

717+
override_cache_bits(&boot_cpu_data);
718+
678719
switch (l1tf_mitigation) {
679720
case L1TF_MITIGATION_OFF:
680721
case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
694735
return;
695736
#endif
696737

697-
/*
698-
* This is extremely unlikely to happen because almost all
699-
* systems have far more MAX_PA/2 than RAM can be fit into
700-
* DIMM slots.
701-
*/
702738
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
703739
if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
704740
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");

arch/x86/kernel/cpu/common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
919919
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
920920
c->x86_phys_bits = 36;
921921
#endif
922+
c->x86_cache_bits = c->x86_phys_bits;
922923
}
923924

924925
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)

arch/x86/kernel/cpu/intel.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
150150
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
151151
return false;
152152

153+
if (c->x86 != 6)
154+
return false;
155+
153156
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
154157
if (c->x86_model == spectre_bad_microcodes[i].model &&
155158
c->x86_stepping == spectre_bad_microcodes[i].stepping)

arch/x86/kernel/dumpstack.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/bug.h>
1818
#include <linux/nmi.h>
1919
#include <linux/sysfs.h>
20+
#include <linux/kasan.h>
2021

2122
#include <asm/cpu_entry_area.h>
2223
#include <asm/stacktrace.h>
@@ -89,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable,
8990
* Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
9091
* guesstimate in attempt to achieve all of the above.
9192
*/
92-
void show_opcodes(u8 *rip, const char *loglvl)
93+
void show_opcodes(struct pt_regs *regs, const char *loglvl)
9394
{
9495
#define PROLOGUE_SIZE 42
9596
#define EPILOGUE_SIZE 21
9697
#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
9798
u8 opcodes[OPCODE_BUFSIZE];
99+
unsigned long prologue = regs->ip - PROLOGUE_SIZE;
100+
bool bad_ip;
98101

99-
if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) {
102+
/*
103+
* Make sure userspace isn't trying to trick us into dumping kernel
104+
* memory by pointing the userspace instruction pointer at it.
105+
*/
106+
bad_ip = user_mode(regs) &&
107+
__chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
108+
109+
if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
110+
OPCODE_BUFSIZE)) {
100111
printk("%sCode: Bad RIP value.\n", loglvl);
101112
} else {
102113
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
@@ -112,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
112123
#else
113124
printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
114125
#endif
115-
show_opcodes((u8 *)regs->ip, loglvl);
126+
show_opcodes(regs, loglvl);
116127
}
117128

118129
void show_iret_regs(struct pt_regs *regs)
@@ -346,7 +357,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
346357
* We're not going to return, but we might be on an IST stack or
347358
* have very little stack space left. Rewind the stack and kill
348359
* the task.
360+
* Before we rewind the stack, we have to tell KASAN that we're going to
361+
* reuse the task stack and that existing poisons are invalid.
349362
*/
363+
kasan_unpoison_task_stack(current);
350364
rewind_stack_do_exit(signr);
351365
}
352366
NOKPROBE_SYMBOL(oops_end);

arch/x86/lib/usercopy.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <linux/uaccess.h>
88
#include <linux/export.h>
99

10+
#include <asm/tlbflush.h>
11+
1012
/*
1113
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
1214
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1921
if (__range_not_ok(from, n, TASK_SIZE))
2022
return n;
2123

24+
if (!nmi_uaccess_okay())
25+
return n;
26+
2227
/*
2328
* Even though this function is typically called from NMI/IRQ context
2429
* disable pagefaults so that its behaviour is consistent even when

arch/x86/mm/fault.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
837837

838838
printk(KERN_CONT "\n");
839839

840-
show_opcodes((u8 *)regs->ip, loglvl);
840+
show_opcodes(regs, loglvl);
841841
}
842842

843843
static void

0 commit comments

Comments
 (0)