Skip to content

Commit c5bedc6

Browse files
author
Ingo Molnar
committed
x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active
Introduce a simple fpu->fpstate_active flag in the fpu context data structure and use that instead of PF_USED_MATH in task->flags. Testing for this flag byte should be slightly more efficient than testing a bit in a bitmask, but the main advantage is that most FPU functions can now be performed on a 'struct fpu' alone, they don't need access to 'struct task_struct' anymore. There's a slight linecount increase, mostly due to the 'fpu' local variables and due to extra comments. The local variables will go away once we move most of the FPU methods to pure 'struct fpu' parameters. Reviewed-by: Borislav Petkov <bp@alien8.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent af7f872 commit c5bedc6

File tree

9 files changed

+54
-28
lines changed

9 files changed

+54
-28
lines changed

arch/x86/ia32/ia32_signal.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
307307
size_t frame_size,
308308
void __user **fpstate)
309309
{
310+
struct fpu *fpu = &current->thread.fpu;
310311
unsigned long sp;
311312

312313
/* Default to using normal stack */
@@ -321,7 +322,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
321322
ksig->ka.sa.sa_restorer)
322323
sp = (unsigned long) ksig->ka.sa.sa_restorer;
323324

324-
if (current->flags & PF_USED_MATH) {
325+
if (fpu->fpstate_active) {
325326
unsigned long fx_aligned, math_size;
326327

327328
sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);

arch/x86/include/asm/fpu-internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ static inline void drop_fpu(struct task_struct *tsk)
375375
__thread_fpu_end(fpu);
376376
}
377377

378-
tsk->flags &= ~PF_USED_MATH;
378+
fpu->fpstate_active = 0;
379379

380380
preempt_enable();
381381
}
@@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
424424
* If the task has used the math, pre-load the FPU on xsave processors
425425
* or if the past 5 consecutive context-switches used math.
426426
*/
427-
fpu.preload = (new->flags & PF_USED_MATH) &&
427+
fpu.preload = new_fpu->fpstate_active &&
428428
(use_eager_fpu() || new->thread.fpu.counter > 5);
429429

430430
if (old_fpu->has_fpu) {

arch/x86/include/asm/fpu/types.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ struct fpu {
137137
* deal with bursty apps that only use the FPU for a short time:
138138
*/
139139
unsigned char counter;
140+
/*
141+
* This flag indicates whether this context is fpstate_active: if the task is
142+
* not running then we can restore from this context, if the task
143+
* is running then we should save into this context.
144+
*/
145+
unsigned char fpstate_active;
140146
};
141147

142148
#endif /* _ASM_X86_FPU_H */

arch/x86/include/asm/processor.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ struct thread_struct {
385385
unsigned long fs;
386386
#endif
387387
unsigned long gs;
388+
389+
/* Floating point and extended processor state */
390+
struct fpu fpu;
391+
388392
/* Save middle states of ptrace breakpoints */
389393
struct perf_event *ptrace_bps[HBP_NUM];
390394
/* Debug status used for traps, single steps, etc... */
@@ -395,8 +399,6 @@ struct thread_struct {
395399
unsigned long cr2;
396400
unsigned long trap_nr;
397401
unsigned long error_code;
398-
/* floating point and extended processor state */
399-
struct fpu fpu;
400402
#ifdef CONFIG_X86_32
401403
/* Virtual 86 mode info */
402404
struct vm86_struct __user *vm86_info;

arch/x86/kernel/fpu/core.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,17 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src)
236236

237237
int fpu__copy(struct task_struct *dst, struct task_struct *src)
238238
{
239+
struct fpu *dst_fpu = &dst->thread.fpu;
240+
struct fpu *src_fpu = &src->thread.fpu;
241+
239242
dst->thread.fpu.counter = 0;
240243
dst->thread.fpu.has_fpu = 0;
241244
dst->thread.fpu.state = NULL;
242245

243246
task_disable_lazy_fpu_restore(dst);
244247

245-
if (src->flags & PF_USED_MATH) {
246-
int err = fpstate_alloc(&dst->thread.fpu);
248+
if (src_fpu->fpstate_active) {
249+
int err = fpstate_alloc(dst_fpu);
247250

248251
if (err)
249252
return err;
@@ -260,11 +263,12 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
260263
*/
261264
int fpstate_alloc_init(struct task_struct *curr)
262265
{
266+
struct fpu *fpu = &curr->thread.fpu;
263267
int ret;
264268

265269
if (WARN_ON_ONCE(curr != current))
266270
return -EINVAL;
267-
if (WARN_ON_ONCE(curr->flags & PF_USED_MATH))
271+
if (WARN_ON_ONCE(fpu->fpstate_active))
268272
return -EINVAL;
269273

270274
/*
@@ -277,7 +281,7 @@ int fpstate_alloc_init(struct task_struct *curr)
277281
fpstate_init(&curr->thread.fpu);
278282

279283
/* Safe to do for the current task: */
280-
curr->flags |= PF_USED_MATH;
284+
fpu->fpstate_active = 1;
281285

282286
return 0;
283287
}
@@ -308,12 +312,13 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init);
308312
*/
309313
static int fpu__unlazy_stopped(struct task_struct *child)
310314
{
315+
struct fpu *child_fpu = &child->thread.fpu;
311316
int ret;
312317

313318
if (WARN_ON_ONCE(child == current))
314319
return -EINVAL;
315320

316-
if (child->flags & PF_USED_MATH) {
321+
if (child_fpu->fpstate_active) {
317322
task_disable_lazy_fpu_restore(child);
318323
return 0;
319324
}
@@ -328,7 +333,7 @@ static int fpu__unlazy_stopped(struct task_struct *child)
328333
fpstate_init(&child->thread.fpu);
329334

330335
/* Safe to do for stopped child tasks: */
331-
child->flags |= PF_USED_MATH;
336+
child_fpu->fpstate_active = 1;
332337

333338
return 0;
334339
}
@@ -348,7 +353,7 @@ void fpu__restore(void)
348353
struct task_struct *tsk = current;
349354
struct fpu *fpu = &tsk->thread.fpu;
350355

351-
if (!(tsk->flags & PF_USED_MATH)) {
356+
if (!fpu->fpstate_active) {
352357
local_irq_enable();
353358
/*
354359
* does a slab alloc which can sleep
@@ -378,14 +383,16 @@ EXPORT_SYMBOL_GPL(fpu__restore);
378383

379384
void fpu__flush_thread(struct task_struct *tsk)
380385
{
386+
struct fpu *fpu = &tsk->thread.fpu;
387+
381388
WARN_ON(tsk != current);
382389

383390
if (!use_eager_fpu()) {
384391
/* FPU state will be reallocated lazily at the first use. */
385392
drop_fpu(tsk);
386393
fpstate_free(&tsk->thread.fpu);
387394
} else {
388-
if (!(tsk->flags & PF_USED_MATH)) {
395+
if (!fpu->fpstate_active) {
389396
/* kthread execs. TODO: cleanup this horror. */
390397
if (WARN_ON(fpstate_alloc_init(tsk)))
391398
force_sig(SIGKILL, tsk);
@@ -402,12 +409,16 @@ void fpu__flush_thread(struct task_struct *tsk)
402409
*/
403410
int fpregs_active(struct task_struct *target, const struct user_regset *regset)
404411
{
405-
return (target->flags & PF_USED_MATH) ? regset->n : 0;
412+
struct fpu *target_fpu = &target->thread.fpu;
413+
414+
return target_fpu->fpstate_active ? regset->n : 0;
406415
}
407416

408417
int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
409418
{
410-
return (cpu_has_fxsr && (target->flags & PF_USED_MATH)) ? regset->n : 0;
419+
struct fpu *target_fpu = &target->thread.fpu;
420+
421+
return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
411422
}
412423

413424
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -733,16 +744,17 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
733744
* struct user_i387_struct) but is in fact only used for 32-bit
734745
* dumps, so on 64-bit it is really struct user_i387_ia32_struct.
735746
*/
736-
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
747+
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
737748
{
738749
struct task_struct *tsk = current;
750+
struct fpu *fpu = &tsk->thread.fpu;
739751
int fpvalid;
740752

741-
fpvalid = !!(tsk->flags & PF_USED_MATH);
753+
fpvalid = fpu->fpstate_active;
742754
if (fpvalid)
743755
fpvalid = !fpregs_get(tsk, NULL,
744756
0, sizeof(struct user_i387_ia32_struct),
745-
fpu, NULL);
757+
ufpu, NULL);
746758

747759
return fpvalid;
748760
}

arch/x86/kernel/fpu/xsave.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
334334
{
335335
int ia32_fxstate = (buf != buf_fx);
336336
struct task_struct *tsk = current;
337+
struct fpu *fpu = &tsk->thread.fpu;
337338
int state_size = xstate_size;
338339
u64 xstate_bv = 0;
339340
int fx_only = 0;
@@ -349,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
349350
if (!access_ok(VERIFY_READ, buf, size))
350351
return -EACCES;
351352

352-
if (!(tsk->flags & PF_USED_MATH) && fpstate_alloc_init(tsk))
353+
if (!fpu->fpstate_active && fpstate_alloc_init(tsk))
353354
return -1;
354355

355356
if (!static_cpu_has(X86_FEATURE_FPU))
@@ -384,12 +385,12 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
384385
int err = 0;
385386

386387
/*
387-
* Drop the current fpu which clears PF_USED_MATH. This ensures
388+
* Drop the current fpu which clears fpu->fpstate_active. This ensures
388389
* that any context-switch during the copy of the new state,
389390
* avoids the intermediate state from getting restored/saved.
390391
* Thus avoiding the new restored state from getting corrupted.
391392
* We will be ready to restore/save the state only after
392-
* PF_USED_MATH is again set.
393+
* fpu->fpstate_active is again set.
393394
*/
394395
drop_fpu(tsk);
395396

@@ -401,7 +402,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
401402
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
402403
}
403404

404-
tsk->flags |= PF_USED_MATH;
405+
fpu->fpstate_active = 1;
405406
if (use_eager_fpu()) {
406407
preempt_disable();
407408
fpu__restore();
@@ -685,7 +686,7 @@ void xsave_init(void)
685686
*/
686687
void __init_refok eager_fpu_init(void)
687688
{
688-
WARN_ON(current->flags & PF_USED_MATH);
689+
WARN_ON(current->thread.fpu.fpstate_active);
689690
current_thread_info()->status = 0;
690691

691692
if (eagerfpu == ENABLE)

arch/x86/kernel/signal.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
198198
unsigned long sp = regs->sp;
199199
unsigned long buf_fx = 0;
200200
int onsigstack = on_sig_stack(sp);
201+
struct fpu *fpu = &current->thread.fpu;
201202

202203
/* redzone */
203204
if (config_enabled(CONFIG_X86_64))
@@ -217,7 +218,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
217218
}
218219
}
219220

220-
if (current->flags & PF_USED_MATH) {
221+
if (fpu->fpstate_active) {
221222
sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
222223
&buf_fx, &math_size);
223224
*fpstate = (void __user *)sp;
@@ -233,7 +234,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
233234
return (void __user *)-1L;
234235

235236
/* save i387 and extended state */
236-
if ((current->flags & PF_USED_MATH) &&
237+
if (fpu->fpstate_active &&
237238
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
238239
return (void __user *)-1L;
239240

@@ -616,6 +617,7 @@ static void
616617
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
617618
{
618619
bool stepping, failed;
620+
struct fpu *fpu = &current->thread.fpu;
619621

620622
/* Are we from a system call? */
621623
if (syscall_get_nr(current, regs) >= 0) {
@@ -664,7 +666,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
664666
/*
665667
* Ensure the signal handler starts with the new fpu state.
666668
*/
667-
if (current->flags & PF_USED_MATH)
669+
if (fpu->fpstate_active)
668670
fpu_reset_state(current);
669671
}
670672
signal_setup_done(failed, ksig, stepping);

arch/x86/kvm/x86.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6597,10 +6597,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
65976597

65986598
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
65996599
{
6600+
struct fpu *fpu = &current->thread.fpu;
66006601
int r;
66016602
sigset_t sigsaved;
66026603

6603-
if (!(current->flags & PF_USED_MATH) && fpstate_alloc_init(current))
6604+
if (!fpu->fpstate_active && fpstate_alloc_init(current))
66046605
return -ENOMEM;
66056606

66066607
if (vcpu->sigset_active)

arch/x86/math-emu/fpu_entry.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,9 @@ void math_emulate(struct math_emu_info *info)
147147
unsigned long code_base = 0;
148148
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
149149
struct desc_struct code_descriptor;
150+
struct fpu *fpu = &current->thread.fpu;
150151

151-
if (!(current->flags & PF_USED_MATH)) {
152+
if (!fpu->fpstate_active) {
152153
if (fpstate_alloc_init(current)) {
153154
do_group_exit(SIGKILL);
154155
return;

0 commit comments

Comments
 (0)