Skip to content

Commit ce4d72f

Browse files
committed
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar: "There are two main areas of changes: - Rework of the extended FPU state code to robustify the kernel's usage of cpuid provided xstate sizes - and related changes (Dave Hansen)" - math emulation enhancements: new modern FPU instructions support, with testcases, plus cleanups (Denys Vlasnko)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/fpu: Fixup uninitialized feature_name warning x86/fpu/math-emu: Add support for FISTTP instructions x86/fpu/math-emu, selftests: Add test for FISTTP instructions x86/fpu/math-emu: Add support for FCMOVcc insns x86/fpu/math-emu: Add support for F[U]COMI[P] insns x86/fpu/math-emu: Remove define layer for undocumented opcodes x86/fpu/math-emu, selftests: Add tests for FCMOV and FCOMI insns x86/fpu/math-emu: Remove !NO_UNDOC_CODE x86/fpu: Check CPU-provided sizes against struct declarations x86/fpu: Check to ensure increasing-offset xstate offsets x86/fpu: Correct and check XSAVE xstate size calculations x86/fpu: Add C structures for AVX-512 state components x86/fpu: Rework YMM definition x86/fpu/mpx: Rework MPX 'xstate' types x86/fpu: Add xfeature_enabled() helper instead of test_bit() x86/fpu: Remove 'xfeature_nr' x86/fpu: Rework XSTATE_* macros to remove magic '2' x86/fpu: Rename XFEATURES_NR_MAX x86/fpu: Rename XSAVE macros x86/fpu: Remove partial LWP support definitions ...
2 parents 0f25f2c + 158ecc3 commit ce4d72f

33 files changed

+1374
-243
lines changed

arch/x86/crypto/camellia_aesni_avx2_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void)
567567
return -ENODEV;
568568
}
569569

570-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
570+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
571+
&feature_name)) {
571572
pr_info("CPU feature '%s' is not supported.\n", feature_name);
572573
return -ENODEV;
573574
}

arch/x86/crypto/camellia_aesni_avx_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,8 @@ static int __init camellia_aesni_init(void)
559559
return -ENODEV;
560560
}
561561

562-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
562+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
563+
&feature_name)) {
563564
pr_info("CPU feature '%s' is not supported.\n", feature_name);
564565
return -ENODEV;
565566
}

arch/x86/crypto/cast5_avx_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ static int __init cast5_init(void)
469469
{
470470
const char *feature_name;
471471

472-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
472+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
473+
&feature_name)) {
473474
pr_info("CPU feature '%s' is not supported.\n", feature_name);
474475
return -ENODEV;
475476
}

arch/x86/crypto/cast6_avx_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,8 @@ static int __init cast6_init(void)
591591
{
592592
const char *feature_name;
593593

594-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
594+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
595+
&feature_name)) {
595596
pr_info("CPU feature '%s' is not supported.\n", feature_name);
596597
return -ENODEV;
597598
}

arch/x86/crypto/chacha20_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void)
130130

131131
#ifdef CONFIG_AS_AVX2
132132
chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
133-
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
133+
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
134134
#endif
135135
return crypto_register_alg(&alg);
136136
}

arch/x86/crypto/poly1305_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void)
184184

185185
#ifdef CONFIG_AS_AVX2
186186
poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
187-
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
187+
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
188188
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
189189
if (poly1305_use_avx2)
190190
alg.descsize += 10 * sizeof(u32);

arch/x86/crypto/serpent_avx2_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,8 @@ static int __init init(void)
542542
pr_info("AVX2 instructions are not detected.\n");
543543
return -ENODEV;
544544
}
545-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
545+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
546+
&feature_name)) {
546547
pr_info("CPU feature '%s' is not supported.\n", feature_name);
547548
return -ENODEV;
548549
}

arch/x86/crypto/serpent_avx_glue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,8 @@ static int __init serpent_init(void)
597597
{
598598
const char *feature_name;
599599

600-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
600+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
601+
&feature_name)) {
601602
pr_info("CPU feature '%s' is not supported.\n", feature_name);
602603
return -ENODEV;
603604
}

arch/x86/crypto/sha1_ssse3_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ static struct shash_alg alg = {
121121
#ifdef CONFIG_AS_AVX
122122
static bool __init avx_usable(void)
123123
{
124-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
124+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
125125
if (cpu_has_avx)
126126
pr_info("AVX detected but unusable.\n");
127127
return false;

arch/x86/crypto/sha256_ssse3_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static struct shash_alg algs[] = { {
130130
#ifdef CONFIG_AS_AVX
131131
static bool __init avx_usable(void)
132132
{
133-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
133+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
134134
if (cpu_has_avx)
135135
pr_info("AVX detected but unusable.\n");
136136
return false;

arch/x86/crypto/sha512_ssse3_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ static struct shash_alg algs[] = { {
129129
#ifdef CONFIG_AS_AVX
130130
static bool __init avx_usable(void)
131131
{
132-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
132+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
133133
if (cpu_has_avx)
134134
pr_info("AVX detected but unusable.\n");
135135
return false;

arch/x86/crypto/twofish_avx_glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ static int __init twofish_init(void)
558558
{
559559
const char *feature_name;
560560

561-
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
561+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
562562
pr_info("CPU feature '%s' is not supported.\n", feature_name);
563563
return -ENODEV;
564564
}

arch/x86/include/asm/fpu/types.h

Lines changed: 103 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -95,94 +95,152 @@ struct swregs_state {
9595
/*
9696
* List of XSAVE features Linux knows about:
9797
*/
98-
enum xfeature_bit {
99-
XSTATE_BIT_FP,
100-
XSTATE_BIT_SSE,
101-
XSTATE_BIT_YMM,
102-
XSTATE_BIT_BNDREGS,
103-
XSTATE_BIT_BNDCSR,
104-
XSTATE_BIT_OPMASK,
105-
XSTATE_BIT_ZMM_Hi256,
106-
XSTATE_BIT_Hi16_ZMM,
107-
108-
XFEATURES_NR_MAX,
98+
enum xfeature {
99+
XFEATURE_FP,
100+
XFEATURE_SSE,
101+
/*
102+
* Values above here are "legacy states".
103+
* Those below are "extended states".
104+
*/
105+
XFEATURE_YMM,
106+
XFEATURE_BNDREGS,
107+
XFEATURE_BNDCSR,
108+
XFEATURE_OPMASK,
109+
XFEATURE_ZMM_Hi256,
110+
XFEATURE_Hi16_ZMM,
111+
112+
XFEATURE_MAX,
109113
};
110114

111-
#define XSTATE_FP (1 << XSTATE_BIT_FP)
112-
#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
113-
#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
114-
#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
115-
#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
116-
#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
117-
#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
118-
#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
115+
#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
116+
#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
117+
#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
118+
#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
119+
#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
120+
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
121+
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
122+
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
123+
124+
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
125+
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
126+
| XFEATURE_MASK_ZMM_Hi256 \
127+
| XFEATURE_MASK_Hi16_ZMM)
128+
129+
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
119130

120-
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
121-
#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
131+
struct reg_128_bit {
132+
u8 regbytes[128/8];
133+
};
134+
struct reg_256_bit {
135+
u8 regbytes[256/8];
136+
};
137+
struct reg_512_bit {
138+
u8 regbytes[512/8];
139+
};
122140

123141
/*
142+
* State component 2:
143+
*
124144
* There are 16x 256-bit AVX registers named YMM0-YMM15.
125145
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
126-
* and are stored in 'struct fxregs_state::xmm_space[]'.
146+
* and are stored in 'struct fxregs_state::xmm_space[]' in the
147+
* "legacy" area.
127148
*
128-
* The high 128 bits are stored here:
129-
* 16x 128 bits == 256 bytes.
149+
* The high 128 bits are stored here.
130150
*/
131151
struct ymmh_struct {
132-
u8 ymmh_space[256];
133-
};
134-
135-
/* We don't support LWP yet: */
136-
struct lwp_struct {
137-
u8 reserved[128];
138-
};
152+
struct reg_128_bit hi_ymm[16];
153+
} __packed;
139154

140155
/* Intel MPX support: */
141-
struct bndreg {
156+
157+
struct mpx_bndreg {
142158
u64 lower_bound;
143159
u64 upper_bound;
144160
} __packed;
161+
/*
162+
* State component 3 is used for the 4 128-bit bounds registers
163+
*/
164+
struct mpx_bndreg_state {
165+
struct mpx_bndreg bndreg[4];
166+
} __packed;
145167

146-
struct bndcsr {
168+
/*
169+
* State component 4 is used for the 64-bit user-mode MPX
170+
* configuration register BNDCFGU and the 64-bit MPX status
171+
* register BNDSTATUS. We call the pair "BNDCSR".
172+
*/
173+
struct mpx_bndcsr {
147174
u64 bndcfgu;
148175
u64 bndstatus;
149176
} __packed;
150177

151-
struct mpx_struct {
152-
struct bndreg bndreg[4];
153-
struct bndcsr bndcsr;
154-
};
178+
/*
179+
* The BNDCSR state is padded out to be 64-bytes in size.
180+
*/
181+
struct mpx_bndcsr_state {
182+
union {
183+
struct mpx_bndcsr bndcsr;
184+
u8 pad_to_64_bytes[64];
185+
};
186+
} __packed;
187+
188+
/* AVX-512 Components: */
189+
190+
/*
191+
* State component 5 is used for the 8 64-bit opmask registers
192+
* k0-k7 (opmask state).
193+
*/
194+
struct avx_512_opmask_state {
195+
u64 opmask_reg[8];
196+
} __packed;
197+
198+
/*
199+
* State component 6 is used for the upper 256 bits of the
200+
* registers ZMM0-ZMM15. These 16 256-bit values are denoted
201+
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
202+
*/
203+
struct avx_512_zmm_uppers_state {
204+
struct reg_256_bit zmm_upper[16];
205+
} __packed;
206+
207+
/*
208+
* State component 7 is used for the 16 512-bit registers
209+
* ZMM16-ZMM31 (Hi16_ZMM state).
210+
*/
211+
struct avx_512_hi16_state {
212+
struct reg_512_bit hi16_zmm[16];
213+
} __packed;
155214

156215
struct xstate_header {
157216
u64 xfeatures;
158217
u64 xcomp_bv;
159218
u64 reserved[6];
160219
} __attribute__((packed));
161220

162-
/* New processor state extensions should be added here: */
163-
#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
164-
sizeof(struct lwp_struct) + \
165-
sizeof(struct mpx_struct) )
166221
/*
167222
* This is our most modern FPU state format, as saved by the XSAVE
168223
* and restored by the XRSTOR instructions.
169224
*
170225
* It consists of a legacy fxregs portion, an xstate header and
171-
* subsequent fixed size areas as defined by the xstate header.
172-
* Not all CPUs support all the extensions.
226+
* subsequent areas as defined by the xstate header. Not all CPUs
227+
* support all the extensions, so the size of the extended area
228+
* can vary quite a bit between CPUs.
173229
*/
174230
struct xregs_state {
175231
struct fxregs_state i387;
176232
struct xstate_header header;
177-
u8 __reserved[XSTATE_RESERVE];
233+
u8 extended_state_area[0];
178234
} __attribute__ ((packed, aligned (64)));
179235

180236
/*
181237
* This is a union of all the possible FPU state formats
182238
* put together, so that we can pick the right one runtime.
183239
*
184240
* The size of the structure is determined by the largest
185-
* member - which is the xsave area:
241+
* member - which is the xsave area. The padding is there
242+
* to ensure that statically-allocated task_structs (just
243+
* the init_task today) have enough space.
186244
*/
187245
union fpregs_state {
188246
struct fregs_state fsave;

arch/x86/include/asm/fpu/xstate.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <linux/uaccess.h>
77

88
/* Bit 63 of XCR0 is reserved for future expansion */
9-
#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
9+
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
1010

1111
#define XSTATE_CPUID 0x0000000d
1212

@@ -19,14 +19,18 @@
1919
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
2020

2121
/* Supported features which support lazy state saving */
22-
#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
23-
| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
22+
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
23+
XFEATURE_MASK_SSE | \
24+
XFEATURE_MASK_YMM | \
25+
XFEATURE_MASK_OPMASK | \
26+
XFEATURE_MASK_ZMM_Hi256 | \
27+
XFEATURE_MASK_Hi16_ZMM)
2428

2529
/* Supported features which require eager state saving */
26-
#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
30+
#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
2731

2832
/* All currently supported features */
29-
#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
33+
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
3034

3135
#ifdef CONFIG_X86_64
3236
#define REX_PREFIX "0x48, "
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
4044

4145
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
4246

47+
void fpu__xstate_clear_all_cpu_caps(void);
4348
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
4449
const void *get_xsave_field_ptr(int xstate_field);
4550

arch/x86/include/asm/trace/mpx.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
TRACE_EVENT(mpx_bounds_register_exception,
1212

1313
TP_PROTO(void *addr_referenced,
14-
const struct bndreg *bndreg),
14+
const struct mpx_bndreg *bndreg),
1515
TP_ARGS(addr_referenced, bndreg),
1616

1717
TP_STRUCT__entry(
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,
4444

4545
TRACE_EVENT(bounds_exception_mpx,
4646

47-
TP_PROTO(const struct bndcsr *bndcsr),
47+
TP_PROTO(const struct mpx_bndcsr *bndcsr),
4848
TP_ARGS(bndcsr),
4949

5050
TP_STRUCT__entry(
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
116116
/*
117117
* This gets used outside of MPX-specific code, so we need a stub.
118118
*/
119-
static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
119+
static inline
120+
void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
120121
{
121122
}
122123

0 commit comments

Comments
 (0)