Skip to content

Commit d82924c

Browse files
committed
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Ingo Molnar: "The main changes: - Make the IBPB barrier more strict and add STIBP support (Jiri Kosina) - Micro-optimize and clean up the entry code (Andy Lutomirski) - ... plus misc other fixes" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/speculation: Propagate information about RSB filling mitigation to sysfs x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation x86/speculation: Apply IBPB more strictly to avoid cross-process data leak x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant x86/CPU: Fix unused variable warning when !CONFIG_IA32_EMULATION x86/pti/64: Remove the SYSCALL64 entry trampoline x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space x86/entry/64: Document idtentry
2 parents d7197a5 + bb4b3b7 commit d82924c

File tree

19 files changed

+222
-176
lines changed

19 files changed

+222
-176
lines changed

arch/x86/entry/entry_64.S

Lines changed: 45 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -142,67 +142,6 @@ END(native_usergs_sysret64)
142142
* with them due to bugs in both AMD and Intel CPUs.
143143
*/
144144

145-
.pushsection .entry_trampoline, "ax"
146-
147-
/*
148-
* The code in here gets remapped into cpu_entry_area's trampoline. This means
149-
* that the assembler and linker have the wrong idea as to where this code
150-
* lives (and, in fact, it's mapped more than once, so it's not even at a
151-
* fixed address). So we can't reference any symbols outside the entry
152-
* trampoline and expect it to work.
153-
*
154-
* Instead, we carefully abuse %rip-relative addressing.
155-
* _entry_trampoline(%rip) refers to the start of the remapped) entry
156-
* trampoline. We can thus find cpu_entry_area with this macro:
157-
*/
158-
159-
#define CPU_ENTRY_AREA \
160-
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
161-
162-
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
163-
#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
164-
SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
165-
166-
ENTRY(entry_SYSCALL_64_trampoline)
167-
UNWIND_HINT_EMPTY
168-
swapgs
169-
170-
/* Stash the user RSP. */
171-
movq %rsp, RSP_SCRATCH
172-
173-
/* Note: using %rsp as a scratch reg. */
174-
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
175-
176-
/* Load the top of the task stack into RSP */
177-
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
178-
179-
/* Start building the simulated IRET frame. */
180-
pushq $__USER_DS /* pt_regs->ss */
181-
pushq RSP_SCRATCH /* pt_regs->sp */
182-
pushq %r11 /* pt_regs->flags */
183-
pushq $__USER_CS /* pt_regs->cs */
184-
pushq %rcx /* pt_regs->ip */
185-
186-
/*
187-
* x86 lacks a near absolute jump, and we can't jump to the real
188-
* entry text with a relative jump. We could push the target
189-
* address and then use retq, but this destroys the pipeline on
190-
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
191-
* spill RDI and restore it in a second-stage trampoline.
192-
*/
193-
pushq %rdi
194-
movq $entry_SYSCALL_64_stage2, %rdi
195-
JMP_NOSPEC %rdi
196-
END(entry_SYSCALL_64_trampoline)
197-
198-
.popsection
199-
200-
ENTRY(entry_SYSCALL_64_stage2)
201-
UNWIND_HINT_EMPTY
202-
popq %rdi
203-
jmp entry_SYSCALL_64_after_hwframe
204-
END(entry_SYSCALL_64_stage2)
205-
206145
ENTRY(entry_SYSCALL_64)
207146
UNWIND_HINT_EMPTY
208147
/*
@@ -212,21 +151,19 @@ ENTRY(entry_SYSCALL_64)
212151
*/
213152

214153
swapgs
215-
/*
216-
* This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
217-
* is not required to switch CR3.
218-
*/
219-
movq %rsp, PER_CPU_VAR(rsp_scratch)
154+
/* tss.sp2 is scratch space. */
155+
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
156+
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
220157
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
221158

222159
/* Construct struct pt_regs on stack */
223-
pushq $__USER_DS /* pt_regs->ss */
224-
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
225-
pushq %r11 /* pt_regs->flags */
226-
pushq $__USER_CS /* pt_regs->cs */
227-
pushq %rcx /* pt_regs->ip */
160+
pushq $__USER_DS /* pt_regs->ss */
161+
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
162+
pushq %r11 /* pt_regs->flags */
163+
pushq $__USER_CS /* pt_regs->cs */
164+
pushq %rcx /* pt_regs->ip */
228165
GLOBAL(entry_SYSCALL_64_after_hwframe)
229-
pushq %rax /* pt_regs->orig_ax */
166+
pushq %rax /* pt_regs->orig_ax */
230167

231168
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
232169

@@ -900,6 +837,42 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
900837
*/
901838
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
902839

840+
/**
841+
* idtentry - Generate an IDT entry stub
842+
* @sym: Name of the generated entry point
843+
* @do_sym: C function to be called
844+
* @has_error_code: True if this IDT vector has an error code on the stack
845+
* @paranoid: non-zero means that this vector may be invoked from
846+
* kernel mode with user GSBASE and/or user CR3.
847+
* 2 is special -- see below.
848+
* @shift_ist: Set to an IST index if entries from kernel mode should
849+
* decrement the IST stack so that nested entries get a
850+
* fresh stack. (This is for #DB, which has a nasty habit
851+
* of recursing.)
852+
*
853+
* idtentry generates an IDT stub that sets up a usable kernel context,
854+
* creates struct pt_regs, and calls @do_sym. The stub has the following
855+
* special behaviors:
856+
*
857+
* On an entry from user mode, the stub switches from the trampoline or
858+
* IST stack to the normal thread stack. On an exit to user mode, the
859+
* normal exit-to-usermode path is invoked.
860+
*
861+
* On an exit to kernel mode, if @paranoid == 0, we check for preemption,
862+
* whereas we omit the preemption check if @paranoid != 0. This is purely
863+
* because the implementation is simpler this way. The kernel only needs
864+
* to check for asynchronous kernel preemption when IRQ handlers return.
865+
*
866+
* If @paranoid == 0, then the stub will handle IRET faults by pretending
867+
* that the fault came from user mode. It will handle gs_change faults by
868+
* pretending that the fault happened with kernel GSBASE. Since this handling
869+
* is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
870+
* @paranoid == 0. This special handling will do the wrong thing for
871+
* espfix-induced #DF on IRET, so #DF must not use @paranoid == 0.
872+
*
873+
* @paranoid == 2 is special: the stub will never switch stacks. This is for
874+
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
875+
*/
903876
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
904877
ENTRY(\sym)
905878
UNWIND_HINT_IRET_REGS offset=\has_error_code*8

arch/x86/include/asm/cpu_entry_area.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ struct cpu_entry_area {
3030
*/
3131
struct tss_struct tss;
3232

33-
char entry_trampoline[PAGE_SIZE];
34-
3533
#ifdef CONFIG_X86_64
3634
/*
3735
* Exception stacks used for IST entries.

arch/x86/include/asm/nospec-branch.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,15 @@
170170
*/
171171
# define CALL_NOSPEC \
172172
ANNOTATE_NOSPEC_ALTERNATIVE \
173-
ALTERNATIVE( \
173+
ALTERNATIVE_2( \
174174
ANNOTATE_RETPOLINE_SAFE \
175175
"call *%[thunk_target]\n", \
176176
"call __x86_indirect_thunk_%V[thunk_target]\n", \
177-
X86_FEATURE_RETPOLINE)
177+
X86_FEATURE_RETPOLINE, \
178+
"lfence;\n" \
179+
ANNOTATE_RETPOLINE_SAFE \
180+
"call *%[thunk_target]\n", \
181+
X86_FEATURE_RETPOLINE_AMD)
178182
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
179183

180184
#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
@@ -184,7 +188,8 @@
184188
* here, anyway.
185189
*/
186190
# define CALL_NOSPEC \
187-
ALTERNATIVE( \
191+
ANNOTATE_NOSPEC_ALTERNATIVE \
192+
ALTERNATIVE_2( \
188193
ANNOTATE_RETPOLINE_SAFE \
189194
"call *%[thunk_target]\n", \
190195
" jmp 904f;\n" \
@@ -199,7 +204,11 @@
199204
" ret;\n" \
200205
" .align 16\n" \
201206
"904: call 901b;\n", \
202-
X86_FEATURE_RETPOLINE)
207+
X86_FEATURE_RETPOLINE, \
208+
"lfence;\n" \
209+
ANNOTATE_RETPOLINE_SAFE \
210+
"call *%[thunk_target]\n", \
211+
X86_FEATURE_RETPOLINE_AMD)
203212

204213
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
205214
#else /* No retpoline for C / inline asm */

arch/x86/include/asm/processor.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,13 @@ struct x86_hw_tss {
316316
*/
317317
u64 sp1;
318318

319+
/*
320+
* Since Linux does not use ring 2, the 'sp2' slot is unused by
321+
* hardware. entry_SYSCALL_64 uses it as scratch space to stash
322+
* the user RSP value.
323+
*/
319324
u64 sp2;
325+
320326
u64 reserved2;
321327
u64 ist[7];
322328
u32 reserved3;

arch/x86/include/asm/sections.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ extern char __end_rodata_aligned[];
1111

1212
#if defined(CONFIG_X86_64)
1313
extern char __end_rodata_hpage_align[];
14-
extern char __entry_trampoline_start[], __entry_trampoline_end[];
1514
#endif
1615

1716
#endif /* _ASM_X86_SECTIONS_H */

arch/x86/kernel/asm-offsets.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,12 @@ void common(void) {
9696
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
9797

9898
/* Layout info for cpu_entry_area */
99-
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
100-
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
10199
OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
102100
DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
103101
DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1)));
104102

105-
/* Offset for sp0 and sp1 into the tss_struct */
103+
/* Offset for fields in tss_struct */
106104
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
107105
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
106+
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
108107
}

arch/x86/kernel/cpu/bugs.c

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
3535
static void __init ssb_select_mitigation(void);
3636
static void __init l1tf_select_mitigation(void);
3737

38-
/*
39-
* Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
40-
* writes to SPEC_CTRL contain whatever reserved bits have been set.
41-
*/
42-
u64 __ro_after_init x86_spec_ctrl_base;
38+
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
39+
u64 x86_spec_ctrl_base;
4340
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
41+
static DEFINE_MUTEX(spec_ctrl_mutex);
4442

4543
/*
4644
* The vendor and possibly platform specific bits which can be modified in
@@ -326,6 +324,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
326324
return cmd;
327325
}
328326

327+
static bool stibp_needed(void)
328+
{
329+
if (spectre_v2_enabled == SPECTRE_V2_NONE)
330+
return false;
331+
332+
if (!boot_cpu_has(X86_FEATURE_STIBP))
333+
return false;
334+
335+
return true;
336+
}
337+
338+
static void update_stibp_msr(void *info)
339+
{
340+
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
341+
}
342+
343+
void arch_smt_update(void)
344+
{
345+
u64 mask;
346+
347+
if (!stibp_needed())
348+
return;
349+
350+
mutex_lock(&spec_ctrl_mutex);
351+
mask = x86_spec_ctrl_base;
352+
if (cpu_smt_control == CPU_SMT_ENABLED)
353+
mask |= SPEC_CTRL_STIBP;
354+
else
355+
mask &= ~SPEC_CTRL_STIBP;
356+
357+
if (mask != x86_spec_ctrl_base) {
358+
pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
359+
cpu_smt_control == CPU_SMT_ENABLED ?
360+
"Enabling" : "Disabling");
361+
x86_spec_ctrl_base = mask;
362+
on_each_cpu(update_stibp_msr, NULL, 1);
363+
}
364+
mutex_unlock(&spec_ctrl_mutex);
365+
}
366+
329367
static void __init spectre_v2_select_mitigation(void)
330368
{
331369
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -426,6 +464,9 @@ static void __init spectre_v2_select_mitigation(void)
426464
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
427465
pr_info("Enabling Restricted Speculation for firmware calls\n");
428466
}
467+
468+
/* Enable STIBP if appropriate */
469+
arch_smt_update();
429470
}
430471

431472
#undef pr_fmt
@@ -816,6 +857,8 @@ static ssize_t l1tf_show_state(char *buf)
816857
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
817858
char *buf, unsigned int bug)
818859
{
860+
int ret;
861+
819862
if (!boot_cpu_has_bug(bug))
820863
return sprintf(buf, "Not affected\n");
821864

@@ -833,10 +876,13 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
833876
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
834877

835878
case X86_BUG_SPECTRE_V2:
836-
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
879+
ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
837880
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
838881
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
882+
(x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "",
883+
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
839884
spectre_v2_module_string());
885+
return ret;
840886

841887
case X86_BUG_SPEC_STORE_BYPASS:
842888
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);

arch/x86/kernel/cpu/common.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,19 +1534,8 @@ EXPORT_PER_CPU_SYMBOL(__preempt_count);
15341534
/* May not be marked __init: used by software suspend */
15351535
void syscall_init(void)
15361536
{
1537-
extern char _entry_trampoline[];
1538-
extern char entry_SYSCALL_64_trampoline[];
1539-
1540-
int cpu = smp_processor_id();
1541-
unsigned long SYSCALL64_entry_trampoline =
1542-
(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1543-
(entry_SYSCALL_64_trampoline - _entry_trampoline);
1544-
15451537
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1546-
if (static_cpu_has(X86_FEATURE_PTI))
1547-
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1548-
else
1549-
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1538+
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
15501539

15511540
#ifdef CONFIG_IA32_EMULATION
15521541
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1557,7 +1546,8 @@ void syscall_init(void)
15571546
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
15581547
*/
15591548
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1560-
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1549+
wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
1550+
(unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
15611551
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
15621552
#else
15631553
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);

arch/x86/kernel/kprobes/core.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,18 +1028,10 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);
10281028

10291029
bool arch_within_kprobe_blacklist(unsigned long addr)
10301030
{
1031-
bool is_in_entry_trampoline_section = false;
1032-
1033-
#ifdef CONFIG_X86_64
1034-
is_in_entry_trampoline_section =
1035-
(addr >= (unsigned long)__entry_trampoline_start &&
1036-
addr < (unsigned long)__entry_trampoline_end);
1037-
#endif
10381031
return (addr >= (unsigned long)__kprobes_text_start &&
10391032
addr < (unsigned long)__kprobes_text_end) ||
10401033
(addr >= (unsigned long)__entry_text_start &&
1041-
addr < (unsigned long)__entry_text_end) ||
1042-
is_in_entry_trampoline_section;
1034+
addr < (unsigned long)__entry_text_end);
10431035
}
10441036

10451037
int __init arch_init_kprobes(void)

arch/x86/kernel/process_64.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@
6060
#include <asm/unistd_32_ia32.h>
6161
#endif
6262

63-
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
64-
6563
/* Prints also some state that isn't saved in the pt_regs */
6664
void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
6765
{

arch/x86/kernel/traps.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
383383
* we won't enable interupts or schedule before we invoke
384384
* general_protection, so nothing will clobber the stack
385385
* frame we just set up.
386+
*
387+
* We will enter general_protection with kernel GSBASE,
388+
* which is what the stub expects, given that the faulting
389+
* RIP will be the IRET instruction.
386390
*/
387391
regs->ip = (unsigned long)general_protection;
388392
regs->sp = (unsigned long)&gpregs->orig_ax;

0 commit comments

Comments
 (0)