Skip to content

Commit 304bced

Browse files
Suresh SiddhaH. Peter Anvin
authored andcommitted
x86, fpu: use non-lazy fpu restore for processors supporting xsave
Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Also now kernel_fpu_begin/end() relies on the patched alternative instructions. So move check_fpu() which uses the kernel_fpu_begin/end() after alternative_instructions(). Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com Merge 32-bit boot fix from, Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com Cc: Jim Kukunas <james.t.kukunas@linux.intel.com> Cc: NeilBrown <neilb@suse.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
1 parent 9c6ff8b commit 304bced

File tree

10 files changed

+146
-61
lines changed

10 files changed

+146
-61
lines changed

arch/x86/include/asm/fpu-internal.h

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
291291
static inline void __thread_fpu_end(struct task_struct *tsk)
292292
{
293293
__thread_clear_has_fpu(tsk);
294-
stts();
294+
if (!use_xsave())
295+
stts();
295296
}
296297

297298
static inline void __thread_fpu_begin(struct task_struct *tsk)
298299
{
299-
clts();
300+
if (!use_xsave())
301+
clts();
300302
__thread_set_has_fpu(tsk);
301303
}
302304

305+
static inline void __drop_fpu(struct task_struct *tsk)
306+
{
307+
if (__thread_has_fpu(tsk)) {
308+
/* Ignore delayed exceptions from user space */
309+
asm volatile("1: fwait\n"
310+
"2:\n"
311+
_ASM_EXTABLE(1b, 2b));
312+
__thread_fpu_end(tsk);
313+
}
314+
}
315+
316+
static inline void drop_fpu(struct task_struct *tsk)
317+
{
318+
/*
319+
* Forget coprocessor state..
320+
*/
321+
preempt_disable();
322+
tsk->fpu_counter = 0;
323+
__drop_fpu(tsk);
324+
clear_used_math();
325+
preempt_enable();
326+
}
327+
328+
static inline void drop_init_fpu(struct task_struct *tsk)
329+
{
330+
if (!use_xsave())
331+
drop_fpu(tsk);
332+
else
333+
xrstor_state(init_xstate_buf, -1);
334+
}
335+
303336
/*
304337
* FPU state switching for scheduling.
305338
*
@@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
333366
{
334367
fpu_switch_t fpu;
335368

336-
fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
369+
/*
370+
* If the task has used the math, pre-load the FPU on xsave processors
371+
* or if the past 5 consecutive context-switches used math.
372+
*/
373+
fpu.preload = tsk_used_math(new) && (use_xsave() ||
374+
new->fpu_counter > 5);
337375
if (__thread_has_fpu(old)) {
338376
if (!__save_init_fpu(old))
339377
cpu = ~0;
@@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
345383
new->fpu_counter++;
346384
__thread_set_has_fpu(new);
347385
prefetch(new->thread.fpu.state);
348-
} else
386+
} else if (!use_xsave())
349387
stts();
350388
} else {
351389
old->fpu_counter = 0;
352390
old->thread.fpu.last_cpu = ~0;
353391
if (fpu.preload) {
354392
new->fpu_counter++;
355-
if (fpu_lazy_restore(new, cpu))
393+
if (!use_xsave() && fpu_lazy_restore(new, cpu))
356394
fpu.preload = 0;
357395
else
358396
prefetch(new->thread.fpu.state);
@@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
372410
{
373411
if (fpu.preload) {
374412
if (unlikely(restore_fpu_checking(new)))
375-
__thread_fpu_end(new);
413+
drop_init_fpu(new);
376414
}
377415
}
378416

@@ -400,17 +438,6 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
400438
return __restore_xstate_sig(buf, buf_fx, size);
401439
}
402440

403-
static inline void __drop_fpu(struct task_struct *tsk)
404-
{
405-
if (__thread_has_fpu(tsk)) {
406-
/* Ignore delayed exceptions from user space */
407-
asm volatile("1: fwait\n"
408-
"2:\n"
409-
_ASM_EXTABLE(1b, 2b));
410-
__thread_fpu_end(tsk);
411-
}
412-
}
413-
414441
/*
415442
* Need to be preemption-safe.
416443
*
@@ -431,24 +458,18 @@ static inline void user_fpu_begin(void)
431458
static inline void save_init_fpu(struct task_struct *tsk)
432459
{
433460
WARN_ON_ONCE(!__thread_has_fpu(tsk));
461+
462+
if (use_xsave()) {
463+
xsave_state(&tsk->thread.fpu.state->xsave, -1);
464+
return;
465+
}
466+
434467
preempt_disable();
435468
__save_init_fpu(tsk);
436469
__thread_fpu_end(tsk);
437470
preempt_enable();
438471
}
439472

440-
static inline void drop_fpu(struct task_struct *tsk)
441-
{
442-
/*
443-
* Forget coprocessor state..
444-
*/
445-
tsk->fpu_counter = 0;
446-
preempt_disable();
447-
__drop_fpu(tsk);
448-
preempt_enable();
449-
clear_used_math();
450-
}
451-
452473
/*
453474
* i387 state interaction
454475
*/
@@ -503,12 +524,21 @@ static inline void fpu_free(struct fpu *fpu)
503524
}
504525
}
505526

506-
static inline void fpu_copy(struct fpu *dst, struct fpu *src)
527+
static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
507528
{
508-
memcpy(dst->state, src->state, xstate_size);
509-
}
529+
if (use_xsave()) {
530+
struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;
510531

511-
extern void fpu_finit(struct fpu *fpu);
532+
memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
533+
xsave_state(xsave, -1);
534+
} else {
535+
struct fpu *dfpu = &dst->thread.fpu;
536+
struct fpu *sfpu = &src->thread.fpu;
537+
538+
unlazy_fpu(src);
539+
memcpy(dfpu->state, sfpu->state, xstate_size);
540+
}
541+
}
512542

513543
static inline unsigned long
514544
alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,

arch/x86/include/asm/i387.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct pt_regs;
1919
struct user_i387_struct;
2020

2121
extern int init_fpu(struct task_struct *child);
22+
extern void fpu_finit(struct fpu *fpu);
2223
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
2324
extern void math_state_restore(void);
2425

arch/x86/include/asm/xsave.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
extern unsigned int xstate_size;
3535
extern u64 pcntxt_mask;
3636
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
37+
extern struct xsave_struct *init_xstate_buf;
3738

3839
extern void xsave_init(void);
3940
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);

arch/x86/kernel/cpu/bugs.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,15 @@ void __init check_bugs(void)
165165
print_cpu_info(&boot_cpu_data);
166166
#endif
167167
check_config();
168-
check_fpu();
169168
check_hlt();
170169
check_popad();
171170
init_utsname()->machine[1] =
172171
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
173172
alternative_instructions();
173+
174+
/*
175+
* kernel_fpu_begin/end() in check_fpu() relies on the patched
176+
* alternative instructions.
177+
*/
178+
check_fpu();
174179
}

arch/x86/kernel/i387.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,25 @@
2222
/*
2323
* Were we in an interrupt that interrupted kernel mode?
2424
*
25-
* We can do a kernel_fpu_begin/end() pair *ONLY* if that
25+
* For now, on xsave platforms we will return interrupted
26+
* kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
27+
* for xsave platforms, ideally we can change the return value
28+
* to something like __thread_has_fpu(current). But we need to
29+
* be careful of doing __thread_clear_has_fpu() before saving
30+
* the FPU etc for supporting nested uses etc. For now, take
31+
* the simple route!
32+
*
33+
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
2634
* pair does nothing at all: the thread must not have fpu (so
2735
* that we don't try to save the FPU state), and TS must
2836
* be set (so that the clts/stts pair does nothing that is
2937
* visible in the interrupted kernel thread).
3038
*/
3139
static inline bool interrupted_kernel_fpu_idle(void)
3240
{
41+
if (use_xsave())
42+
return 0;
43+
3344
return !__thread_has_fpu(current) &&
3445
(read_cr0() & X86_CR0_TS);
3546
}
@@ -73,7 +84,7 @@ void kernel_fpu_begin(void)
7384
__save_init_fpu(me);
7485
__thread_clear_has_fpu(me);
7586
/* We do 'stts()' in kernel_fpu_end() */
76-
} else {
87+
} else if (!use_xsave()) {
7788
this_cpu_write(fpu_owner_task, NULL);
7889
clts();
7990
}
@@ -82,7 +93,10 @@ EXPORT_SYMBOL(kernel_fpu_begin);
8293

8394
void kernel_fpu_end(void)
8495
{
85-
stts();
96+
if (use_xsave())
97+
math_state_restore();
98+
else
99+
stts();
86100
preempt_enable();
87101
}
88102
EXPORT_SYMBOL(kernel_fpu_end);

arch/x86/kernel/process.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
6666
{
6767
int ret;
6868

69-
unlazy_fpu(src);
70-
7169
*dst = *src;
7270
if (fpu_allocated(&src->thread.fpu)) {
7371
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
7472
ret = fpu_alloc(&dst->thread.fpu);
7573
if (ret)
7674
return ret;
77-
fpu_copy(&dst->thread.fpu, &src->thread.fpu);
75+
fpu_copy(dst, src);
7876
}
7977
return 0;
8078
}
@@ -153,7 +151,13 @@ void flush_thread(void)
153151

154152
flush_ptrace_hw_breakpoint(tsk);
155153
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
156-
drop_fpu(tsk);
154+
drop_init_fpu(tsk);
155+
/*
156+
* Free the FPU state for non xsave platforms. They get reallocated
157+
* lazily at the first use.
158+
*/
159+
if (!use_xsave())
160+
free_thread_xstate(tsk);
157161
}
158162

159163
static void hard_disable_TSC(void)

arch/x86/kernel/process_32.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
190190
regs->cs = __USER_CS;
191191
regs->ip = new_ip;
192192
regs->sp = new_sp;
193-
/*
194-
* Free the old FP and other extended state
195-
*/
196-
free_thread_xstate(current);
197193
}
198194
EXPORT_SYMBOL_GPL(start_thread);
199195

arch/x86/kernel/process_64.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
232232
regs->cs = _cs;
233233
regs->ss = _ss;
234234
regs->flags = X86_EFLAGS_IF;
235-
/*
236-
* Free the old FP and other extended state
237-
*/
238-
free_thread_xstate(current);
239235
}
240236

241237
void

arch/x86/kernel/traps.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,11 +613,12 @@ void math_state_restore(void)
613613
}
614614

615615
__thread_fpu_begin(tsk);
616+
616617
/*
617618
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
618619
*/
619620
if (unlikely(restore_fpu_checking(tsk))) {
620-
__thread_fpu_end(tsk);
621+
drop_init_fpu(tsk);
621622
force_sig(SIGSEGV, tsk);
622623
return;
623624
}
@@ -629,6 +630,8 @@ EXPORT_SYMBOL_GPL(math_state_restore);
629630
dotraplinkage void __kprobes
630631
do_device_not_available(struct pt_regs *regs, long error_code)
631632
{
633+
BUG_ON(use_xsave());
634+
632635
#ifdef CONFIG_MATH_EMULATION
633636
if (read_cr0() & X86_CR0_EM) {
634637
struct math_emu_info info = { };

0 commit comments

Comments
 (0)