19
19
.section .entry.text, "ax"
20
20
21
21
/*
22
- * 32-bit SYSENTER instruction entry.
22
+ * 32-bit SYSENTER entry.
23
23
*
24
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
25
- * IF and VM in rflags are cleared (IOW: interrupts are off).
24
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
25
+ * on 64-bit kernels running on Intel CPUs.
26
+ *
27
+ * The SYSENTER instruction, in principle, should *only* occur in the
28
+ * vDSO. In practice, a small number of Android devices were shipped
29
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
30
+ * never happened in any of Google's Bionic versions -- it only happened
31
+ * in a narrow range of Intel-provided versions.
32
+ *
33
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
34
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
26
35
* SYSENTER does not save anything on the stack,
27
- * and does not save old rip (!!!) and rflags .
36
+ * and does not save old RIP (!!!), RSP, or RFLAGS .
28
37
*
29
38
* Arguments:
30
39
* eax system call number
35
44
* edi arg5
36
45
* ebp user stack
37
46
* 0(%ebp) arg6
38
- *
39
- * This is purely a fast path. For anything complicated we use the int 0x80
40
- * path below. We set up a complete hardware stack frame to share code
41
- * with the int 0x80 path.
42
47
*/
43
48
ENTRY(entry_SYSENTER_compat)
44
49
/* Interrupts are off on entry. */
@@ -131,17 +136,38 @@ GLOBAL(__end_entry_SYSENTER_compat)
131
136
ENDPROC(entry_SYSENTER_compat)
132
137
133
138
/*
134
- * 32-bit SYSCALL instruction entry.
139
+ * 32-bit SYSCALL entry.
140
+ *
141
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
142
+ * on 64-bit kernels running on AMD CPUs.
143
+ *
144
+ * The SYSCALL instruction, in principle, should *only* occur in the
145
+ * vDSO. In practice, it appears that this really is the case.
146
+ * As evidence:
147
+ *
148
+ * - The calling convention for SYSCALL has changed several times without
149
+ * anyone noticing.
150
+ *
151
+ * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
152
+ * user task that did SYSCALL without immediately reloading SS
153
+ * would randomly crash.
135
154
*
136
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
137
- * then loads new ss, cs, and rip from previously programmed MSRs.
138
- * rflags gets masked by a value from another MSR (so CLD and CLAC
139
- * are not needed). SYSCALL does not save anything on the stack
140
- * and does not change rsp.
155
+ * - Most programmers do not directly target AMD CPUs, and the 32-bit
156
+ * SYSCALL instruction does not exist on Intel CPUs. Even on AMD
157
+ * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
158
+ * because the SYSCALL instruction in legacy/native 32-bit mode (as
159
+ * opposed to compat mode) is sufficiently poorly designed as to be
160
+ * essentially unusable.
141
161
*
142
- * Note: rflags saving+masking-with-MSR happens only in Long mode
162
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
163
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
164
+ * programmed MSRs. RFLAGS gets masked by a value from another MSR
165
+ * (so CLD and CLAC are not needed). SYSCALL does not save anything on
166
+ * the stack and does not change RSP.
167
+ *
168
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
143
169
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
144
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
170
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
145
171
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
146
172
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
147
173
*
@@ -241,7 +267,21 @@ sysret32_from_system_call:
241
267
END(entry_SYSCALL_compat)
242
268
243
269
/*
244
- * Emulated IA32 system calls via int 0x80.
270
+ * 32-bit legacy system call entry.
271
+ *
272
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
273
+ * instruction. INT $0x80 lands here.
274
+ *
275
+ * This entry point can be used by 32-bit and 64-bit programs to perform
276
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
277
+ * various programs and libraries. It is also used by the vDSO's
278
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
279
+ * entry method. Restarted 32-bit system calls also fall back to INT
280
+ * $0x80 regardless of what instruction was originally used to do the
281
+ * system call.
282
+ *
283
+ * This is considered a slow path. It is not used by most libc
284
+ * implementations on modern hardware except during process startup.
245
285
*
246
286
* Arguments:
247
287
* eax system call number
@@ -250,17 +290,8 @@ END(entry_SYSCALL_compat)
250
290
* edx arg3
251
291
* esi arg4
252
292
* edi arg5
253
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
254
- *
255
- * Notes:
256
- * Uses the same stack frame as the x86-64 version.
257
- * All registers except eax must be saved (but ptrace may violate that).
258
- * Arguments are zero extended. For system calls that want sign extension and
259
- * take long arguments a wrapper is needed. Most calls can just be called
260
- * directly.
261
- * Assumes it is only called from user space and entered with interrupts off.
293
+ * ebp arg6
262
294
*/
263
-
264
295
ENTRY(entry_INT80_compat)
265
296
/*
266
297
* Interrupts are off on entry.
0 commit comments