Skip to content

Commit 2ec6797

Browse files
amlutoIngo Molnar
authored andcommitted
x86/entry/64/compat: Remove most of the fast system call machinery
We now have only one code path that calls through the compat syscall table. This will make it much more pleasant to change the pt_regs vs register calling convention, which we need to do to move the call into C. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/320cda5573cefdc601b955d23fbe8f36c085432d.1444091584.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent c5f638a commit 2ec6797

File tree

1 file changed

+4
-242
lines changed

1 file changed

+4
-242
lines changed

arch/x86/entry/entry_64_compat.S

Lines changed: 4 additions & 242 deletions
Original file line numberDiff line numberDiff line change
@@ -101,119 +101,13 @@ sysenter_flags_fixed:
101101
movl $11, %edi
102102
call do_exit
103103

104-
/*
105-
* Re-enable interrupts. IRQ tracing already thinks that IRQs are
106-
* on (since we treat user mode as having IRQs on), and the
107-
* prologue above is too short for it to be worth adding a
108-
* tracing round trip.
109-
*/
110-
ENABLE_INTERRUPTS(CLBR_NONE)
111-
112-
/*
113-
* No need to do an access_ok() check here because RBP has been
114-
* 32-bit zero extended:
115-
*/
116-
ASM_STAC
117-
1: movl (%rbp), %ebp
118-
_ASM_EXTABLE(1b, ia32_badarg)
119-
ASM_CLAC
120-
121-
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
122-
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
123-
jnz sysenter_tracesys
124-
125-
sysenter_do_call:
126-
/* 32-bit syscall -> 64-bit C ABI argument conversion */
127-
movl %edi, %r8d /* arg5 */
128-
movl %ebp, %r9d /* arg6 */
129-
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
130-
movl %ebx, %edi /* arg1 */
131-
movl %edx, %edx /* arg3 (zero extension) */
132-
sysenter_dispatch:
133-
cmpq $(IA32_NR_syscalls-1), %rax
134-
ja 1f
135-
call *ia32_sys_call_table(, %rax, 8)
136-
movq %rax, RAX(%rsp)
137-
1:
138-
DISABLE_INTERRUPTS(CLBR_NONE)
139-
TRACE_IRQS_OFF
140-
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
141-
jnz ia32_ret_from_sys_call_irqs_off
142-
sysexit_from_sys_call:
143-
/*
144-
* NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
145-
* NMI between STI and SYSEXIT has poorly specified behavior,
146-
* and and NMI followed by an IRQ with usergs is fatal. So
147-
* we just pretend we're using SYSEXIT but we really use
148-
* SYSRETL instead.
149-
*
150-
* This code path is still called 'sysexit' because it pairs
151-
* with 'sysenter' and it uses the SYSENTER calling convention.
152-
*/
153-
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
154-
movl RIP(%rsp), %ecx /* User %eip */
155-
movq RAX(%rsp), %rax
156-
movl RSI(%rsp), %esi
157-
movl RDI(%rsp), %edi
158-
xorl %edx, %edx /* Do not leak kernel information */
159-
xorq %r8, %r8
160-
xorq %r9, %r9
161-
xorq %r10, %r10
162-
movl EFLAGS(%rsp), %r11d /* User eflags */
163-
TRACE_IRQS_ON
164-
165-
/*
166-
* SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
167-
* since it avoids a dicey window with interrupts enabled.
168-
*/
169-
movl RSP(%rsp), %esp
170-
171-
/*
172-
* USERGS_SYSRET32 does:
173-
* gsbase = user's gs base
174-
* eip = ecx
175-
* rflags = r11
176-
* cs = __USER32_CS
177-
* ss = __USER_DS
178-
*
179-
* The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
180-
*
181-
* pop %ebp
182-
* pop %edx
183-
* pop %ecx
184-
*
185-
* Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
186-
* avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
187-
* address (already known to user code), and R12-R15 are
188-
* callee-saved and therefore don't contain any interesting
189-
* kernel data.
190-
*/
191-
USERGS_SYSRET32
104+
/* Unreachable. */
105+
ud2
192106

193107
sysenter_fix_flags:
194108
pushq $X86_EFLAGS_FIXED
195109
popfq
196110
jmp sysenter_flags_fixed
197-
198-
sysenter_tracesys:
199-
SAVE_EXTRA_REGS
200-
xorl %eax, %eax /* Do not leak kernel information */
201-
movq %rax, R11(%rsp)
202-
movq %rax, R10(%rsp)
203-
movq %rax, R9(%rsp)
204-
movq %rax, R8(%rsp)
205-
movq %rsp, %rdi /* &pt_regs -> arg1 */
206-
call syscall_trace_enter
207-
208-
/* Reload arg registers from stack. (see sysenter_tracesys) */
209-
movl RCX(%rsp), %ecx
210-
movl RDX(%rsp), %edx
211-
movl RSI(%rsp), %esi
212-
movl RDI(%rsp), %edi
213-
movl %eax, %eax /* zero extension */
214-
215-
RESTORE_EXTRA_REGS
216-
jmp sysenter_do_call
217111
ENDPROC(entry_SYSENTER_compat)
218112

219113
/*
@@ -280,142 +174,10 @@ ENTRY(entry_SYSCALL_compat)
280174
pushq $-ENOSYS /* pt_regs->ax */
281175
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
282176

283-
/*
284-
* No need to do an access_ok check here because r8 has been
285-
* 32-bit zero extended:
286-
*/
287-
ASM_STAC
288-
1: movl (%r8), %r9d
289-
_ASM_EXTABLE(1b, ia32_badarg)
290-
ASM_CLAC
291-
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
292-
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
293-
jnz cstar_tracesys
294-
295-
cstar_do_call:
296-
/* 32-bit syscall -> 64-bit C ABI argument conversion */
297-
movl %edi, %r8d /* arg5 */
298-
/* r9 already loaded */ /* arg6 */
299-
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
300-
movl %ebx, %edi /* arg1 */
301-
movl %edx, %edx /* arg3 (zero extension) */
302-
303-
cstar_dispatch:
304-
cmpq $(IA32_NR_syscalls-1), %rax
305-
ja 1f
306-
307-
call *ia32_sys_call_table(, %rax, 8)
308-
movq %rax, RAX(%rsp)
309-
1:
310-
DISABLE_INTERRUPTS(CLBR_NONE)
311-
TRACE_IRQS_OFF
312-
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
313-
jnz ia32_ret_from_sys_call_irqs_off
314-
315-
sysretl_from_sys_call:
316-
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
317-
movl RDX(%rsp), %edx
318-
movl RSI(%rsp), %esi
319-
movl RDI(%rsp), %edi
320-
movl RIP(%rsp), %ecx
321-
movl EFLAGS(%rsp), %r11d
322-
movq RAX(%rsp), %rax
323-
xorq %r10, %r10
324-
xorq %r9, %r9
325-
xorq %r8, %r8
326-
TRACE_IRQS_ON
327-
movl RSP(%rsp), %esp
328-
/*
329-
* 64-bit->32-bit SYSRET restores eip from ecx,
330-
* eflags from r11 (but RF and VM bits are forced to 0),
331-
* cs and ss are loaded from MSRs.
332-
* (Note: 32-bit->32-bit SYSRET is different: since r11
333-
* does not exist, it merely sets eflags.IF=1).
334-
*
335-
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
336-
* descriptor is not reinitialized. This means that we must
337-
* avoid SYSRET with SS == NULL, which could happen if we schedule,
338-
* exit the kernel, and re-enter using an interrupt vector. (All
339-
* interrupt entries on x86_64 set SS to NULL.) We prevent that
340-
* from happening by reloading SS in __switch_to.
341-
*/
342-
USERGS_SYSRET32
343-
344-
cstar_tracesys:
345-
xchgl %r9d, %ebp
346-
SAVE_EXTRA_REGS
347-
xorl %eax, %eax /* Do not leak kernel information */
348-
movq %rax, R11(%rsp)
349-
movq %rax, R10(%rsp)
350-
movq %r9, R9(%rsp)
351-
movq %rax, R8(%rsp)
352-
movq %rsp, %rdi /* &pt_regs -> arg1 */
353-
call syscall_trace_enter
354-
movl R9(%rsp), %r9d
355-
356-
/* Reload arg registers from stack. (see sysenter_tracesys) */
357-
movl RCX(%rsp), %ecx
358-
movl RDX(%rsp), %edx
359-
movl RSI(%rsp), %esi
360-
movl RDI(%rsp), %edi
361-
movl %eax, %eax /* zero extension */
362-
363-
RESTORE_EXTRA_REGS
364-
xchgl %ebp, %r9d
365-
jmp cstar_do_call
177+
/* Unreachable. */
178+
ud2
366179
END(entry_SYSCALL_compat)
367180

368-
ia32_badarg:
369-
/*
370-
* So far, we've entered kernel mode, set AC, turned on IRQs, and
371-
* saved C regs except r8-r11. We haven't done any of the other
372-
* standard entry work, though. We want to bail, but we shouldn't
373-
* treat this as a syscall entry since we don't even know what the
374-
* args are. Instead, treat this as a non-syscall entry, finish
375-
* the entry work, and immediately exit after setting AX = -EFAULT.
376-
*
377-
* We're really just being polite here. Killing the task outright
378-
* would be a reasonable action, too. Given that the only valid
379-
* way to have gotten here is through the vDSO, and we already know
380-
* that the stack pointer is bad, the task isn't going to survive
381-
* for long no matter what we do.
382-
*/
383-
384-
ASM_CLAC /* undo STAC */
385-
movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */
386-
387-
/* Fill in the rest of pt_regs */
388-
xorl %eax, %eax
389-
movq %rax, R11(%rsp)
390-
movq %rax, R10(%rsp)
391-
movq %rax, R9(%rsp)
392-
movq %rax, R8(%rsp)
393-
SAVE_EXTRA_REGS
394-
395-
/* Turn IRQs back off. */
396-
DISABLE_INTERRUPTS(CLBR_NONE)
397-
TRACE_IRQS_OFF
398-
399-
/* Now finish entering normal kernel mode. */
400-
#ifdef CONFIG_CONTEXT_TRACKING
401-
call enter_from_user_mode
402-
#endif
403-
404-
/* And exit again. */
405-
jmp retint_user
406-
407-
ia32_ret_from_sys_call_irqs_off:
408-
TRACE_IRQS_ON
409-
ENABLE_INTERRUPTS(CLBR_NONE)
410-
411-
ia32_ret_from_sys_call:
412-
xorl %eax, %eax /* Do not leak kernel information */
413-
movq %rax, R11(%rsp)
414-
movq %rax, R10(%rsp)
415-
movq %rax, R9(%rsp)
416-
movq %rax, R8(%rsp)
417-
jmp int_ret_from_sys_call
418-
419181
/*
420182
* Emulated IA32 system calls via int 0x80.
421183
*

0 commit comments

Comments
 (0)