Skip to content

Commit 1d3e53e

Browse files
amlutoIngo Molnar
authored andcommitted
x86/entry/64: Refactor IRQ stacks and make them NMI-safe
This will allow IRQ stacks to nest inside NMIs or similar entries that can happen during IRQ stack setup or teardown. The new macros won't work correctly if they're invoked with IRQs on. Add a check under CONFIG_DEBUG_ENTRY to detect that. Signed-off-by: Andy Lutomirski <luto@kernel.org> [ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool and ORC unwinder's lives a little easier. ] Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent cb8c65c commit 1d3e53e

File tree

3 files changed

+64
-26
lines changed

3 files changed

+64
-26
lines changed

arch/x86/Kconfig.debug

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
305305
Some of these sanity checks may slow down kernel entries and
306306
exits or otherwise impact performance.
307307

308-
This is currently used to help test NMI code.
309-
310308
If unsure, say N.
311309

312310
config DEBUG_NMI_SELFTEST

arch/x86/entry/entry_64.S

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
447447
.endr
448448
END(irq_entries_start)
449449

450+
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
451+
#ifdef CONFIG_DEBUG_ENTRY
452+
pushfq
453+
testl $X86_EFLAGS_IF, (%rsp)
454+
jz .Lokay_\@
455+
ud2
456+
.Lokay_\@:
457+
addq $8, %rsp
458+
#endif
459+
.endm
460+
461+
/*
462+
* Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
463+
* flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
464+
* Requires kernel GSBASE.
465+
*
466+
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
467+
*/
468+
.macro ENTER_IRQ_STACK old_rsp
469+
DEBUG_ENTRY_ASSERT_IRQS_OFF
470+
movq %rsp, \old_rsp
471+
incl PER_CPU_VAR(irq_count)
472+
473+
/*
474+
* Right now, if we just incremented irq_count to zero, we've
475+
* claimed the IRQ stack but we haven't switched to it yet.
476+
*
477+
* If anything is added that can interrupt us here without using IST,
478+
* it must be *extremely* careful to limit its stack usage. This
479+
* could include kprobes and a hypothetical future IST-less #DB
480+
* handler.
481+
*/
482+
483+
cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
484+
pushq \old_rsp
485+
.endm
486+
487+
/*
488+
* Undoes ENTER_IRQ_STACK.
489+
*/
490+
.macro LEAVE_IRQ_STACK
491+
DEBUG_ENTRY_ASSERT_IRQS_OFF
492+
/* We need to be off the IRQ stack before decrementing irq_count. */
493+
popq %rsp
494+
495+
/*
496+
* As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
497+
* the irq stack but we're not on it.
498+
*/
499+
500+
decl PER_CPU_VAR(irq_count)
501+
.endm
502+
450503
/*
451504
* Interrupt entry/exit.
452505
*
@@ -485,17 +538,7 @@ END(irq_entries_start)
485538
CALL_enter_from_user_mode
486539

487540
1:
488-
/*
489-
* Save previous stack pointer, optionally switch to interrupt stack.
490-
* irq_count is used to check if a CPU is already on an interrupt stack
491-
* or not. While this is essentially redundant with preempt_count it is
492-
* a little cheaper to use a separate counter in the PDA (short of
493-
* moving irq_enter into assembly, which would be too much work)
494-
*/
495-
movq %rsp, %rdi
496-
incl PER_CPU_VAR(irq_count)
497-
cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
498-
pushq %rdi
541+
ENTER_IRQ_STACK old_rsp=%rdi
499542
/* We entered an interrupt context - irqs are off: */
500543
TRACE_IRQS_OFF
501544

@@ -515,10 +558,8 @@ common_interrupt:
515558
ret_from_intr:
516559
DISABLE_INTERRUPTS(CLBR_ANY)
517560
TRACE_IRQS_OFF
518-
decl PER_CPU_VAR(irq_count)
519561

520-
/* Restore saved previous stack */
521-
popq %rsp
562+
LEAVE_IRQ_STACK
522563

523564
testb $3, CS(%rsp)
524565
jz retint_kernel
@@ -891,12 +932,10 @@ bad_gs:
891932
ENTRY(do_softirq_own_stack)
892933
pushq %rbp
893934
mov %rsp, %rbp
894-
incl PER_CPU_VAR(irq_count)
895-
cmove PER_CPU_VAR(irq_stack_ptr), %rsp
896-
push %rbp /* frame pointer backlink */
935+
ENTER_IRQ_STACK old_rsp=%r11
897936
call __do_softirq
937+
LEAVE_IRQ_STACK
898938
leaveq
899-
decl PER_CPU_VAR(irq_count)
900939
ret
901940
END(do_softirq_own_stack)
902941

@@ -923,13 +962,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
923962
* see the correct pointer to the pt_regs
924963
*/
925964
movq %rdi, %rsp /* we don't return, adjust the stack frame */
926-
11: incl PER_CPU_VAR(irq_count)
927-
movq %rsp, %rbp
928-
cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
929-
pushq %rbp /* frame pointer backlink */
965+
966+
ENTER_IRQ_STACK old_rsp=%r10
930967
call xen_evtchn_do_upcall
931-
popq %rsp
932-
decl PER_CPU_VAR(irq_count)
968+
LEAVE_IRQ_STACK
969+
933970
#ifndef CONFIG_PREEMPT
934971
call xen_maybe_preempt_hcall
935972
#endif

arch/x86/kernel/process_64.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
279279
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
280280
unsigned prev_fsindex, prev_gsindex;
281281

282+
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
283+
this_cpu_read(irq_count) != -1);
284+
282285
switch_fpu_prepare(prev_fpu, cpu);
283286

284287
/* We must save %fs and %gs before load_TLS() because

0 commit comments

Comments
 (0)