Skip to content

Commit 85063fa

Browse files
amlutoIngo Molnar
authored andcommitted
x86/entry/64: Clean up and document espfix64 stack setup
The espfix64 setup code was a bit inscrutible and contained an unnecessary push of RAX. Remove that push, update all the stack offsets to match, and document the whole mess. Reported-By: Borislav Petkov <bp@alien8.de> Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/e5459eb10cf1175c8b36b840bc425f210d045f35.1473717910.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 1ef0199 commit 85063fa

File tree

1 file changed

+53
-11
lines changed

1 file changed

+53
-11
lines changed

arch/x86/entry/entry_64.S

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -586,27 +586,69 @@ native_irq_return_iret:
586586

587587
#ifdef CONFIG_X86_ESPFIX64
588588
native_irq_return_ldt:
589-
pushq %rax
590-
pushq %rdi
589+
/*
590+
* We are running with user GSBASE. All GPRs contain their user
591+
* values. We have a percpu ESPFIX stack that is eight slots
592+
* long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
593+
* of the ESPFIX stack.
594+
*
595+
* We clobber RAX and RDI in this code. We stash RDI on the
596+
* normal stack and RAX on the ESPFIX stack.
597+
*
598+
* The ESPFIX stack layout we set up looks like this:
599+
*
600+
* --- top of ESPFIX stack ---
601+
* SS
602+
* RSP
603+
* RFLAGS
604+
* CS
605+
* RIP <-- RSP points here when we're done
606+
* RAX <-- espfix_waddr points here
607+
* --- bottom of ESPFIX stack ---
608+
*/
609+
610+
pushq %rdi /* Stash user RDI */
591611
SWAPGS
592612
movq PER_CPU_VAR(espfix_waddr), %rdi
593-
movq %rax, (0*8)(%rdi) /* RAX */
594-
movq (2*8)(%rsp), %rax /* RIP */
613+
movq %rax, (0*8)(%rdi) /* user RAX */
614+
movq (1*8)(%rsp), %rax /* user RIP */
595615
movq %rax, (1*8)(%rdi)
596-
movq (3*8)(%rsp), %rax /* CS */
616+
movq (2*8)(%rsp), %rax /* user CS */
597617
movq %rax, (2*8)(%rdi)
598-
movq (4*8)(%rsp), %rax /* RFLAGS */
618+
movq (3*8)(%rsp), %rax /* user RFLAGS */
599619
movq %rax, (3*8)(%rdi)
600-
movq (6*8)(%rsp), %rax /* SS */
620+
movq (5*8)(%rsp), %rax /* user SS */
601621
movq %rax, (5*8)(%rdi)
602-
movq (5*8)(%rsp), %rax /* RSP */
622+
movq (4*8)(%rsp), %rax /* user RSP */
603623
movq %rax, (4*8)(%rdi)
604-
andl $0xffff0000, %eax
605-
popq %rdi
624+
/* Now RAX == RSP. */
625+
626+
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
627+
popq %rdi /* Restore user RDI */
628+
629+
/*
630+
* espfix_stack[31:16] == 0. The page tables are set up such that
631+
* (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
632+
* espfix_waddr for any X. That is, there are 65536 RO aliases of
633+
* the same page. Set up RSP so that RSP[31:16] contains the
634+
* respective 16 bits of the /userspace/ RSP and RSP nonetheless
635+
* still points to an RO alias of the ESPFIX stack.
636+
*/
606637
orq PER_CPU_VAR(espfix_stack), %rax
607638
SWAPGS
608639
movq %rax, %rsp
609-
popq %rax
640+
641+
/*
642+
* At this point, we cannot write to the stack any more, but we can
643+
* still read.
644+
*/
645+
popq %rax /* Restore user RAX */
646+
647+
/*
648+
* RSP now points to an ordinary IRET frame, except that the page
649+
* is read-only and RSP[31:16] are preloaded with the userspace
650+
* values. We can now IRET back to userspace.
651+
*/
610652
jmp native_irq_return_iret
611653
#endif
612654
END(common_interrupt)

0 commit comments

Comments
 (0)