Skip to content

Commit 7582e22

Browse files
Dave Martinwildea01
authored andcommitted
arm64/sve: Backend logic for setting the vector length
This patch implements the core logic for changing a task's vector length on request from userspace. This will be used by the ptrace and prctl frontends that are implemented in later patches. The SVE architecture permits, but does not require, implementations to support vector lengths that are not a power of two. To handle this, logic is added to check a requested vector length against a possibly sparse bitmap of available vector lengths at runtime, so that the best supported value can be chosen. Signed-off-by: Dave Martin <Dave.Martin@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
1 parent 8cd969d commit 7582e22

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

arch/arm64/include/asm/fpsimd.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#ifndef __ASSEMBLY__
2222

23+
#include <linux/cache.h>
2324
#include <linux/stddef.h>
2425

2526
/*
@@ -70,17 +71,24 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
7071

7172
extern void fpsimd_flush_task_state(struct task_struct *target);
7273

74+
/* Maximum VL that SVE VL-agnostic software can transparently support */
75+
#define SVE_VL_ARCH_MAX 0x100
76+
7377
extern void sve_save_state(void *state, u32 *pfpsr);
7478
extern void sve_load_state(void const *state, u32 const *pfpsr,
7579
unsigned long vq_minus_1);
7680
extern unsigned int sve_get_vl(void);
7781

82+
extern int __ro_after_init sve_max_vl;
83+
7884
#ifdef CONFIG_ARM64_SVE
7985

8086
extern size_t sve_state_size(struct task_struct const *task);
8187

8288
extern void sve_alloc(struct task_struct *task);
8389
extern void fpsimd_release_task(struct task_struct *task);
90+
extern int sve_set_vector_length(struct task_struct *task,
91+
unsigned long vl, unsigned long flags);
8492

8593
#else /* ! CONFIG_ARM64_SVE */
8694

arch/arm64/kernel/fpsimd.c

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1818
*/
1919

20+
#include <linux/bitmap.h>
2021
#include <linux/bottom_half.h>
2122
#include <linux/bug.h>
23+
#include <linux/cache.h>
2224
#include <linux/compat.h>
2325
#include <linux/cpu.h>
2426
#include <linux/cpu_pm.h>
@@ -28,6 +30,7 @@
2830
#include <linux/init.h>
2931
#include <linux/percpu.h>
3032
#include <linux/preempt.h>
33+
#include <linux/prctl.h>
3134
#include <linux/ptrace.h>
3235
#include <linux/sched/signal.h>
3336
#include <linux/sched/task_stack.h>
@@ -114,6 +117,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
114117
/* Default VL for tasks that don't set it explicitly: */
115118
static int sve_default_vl = SVE_VL_MIN;
116119

120+
#ifdef CONFIG_ARM64_SVE
121+
122+
/* Maximum supported vector length across all CPUs (initially poisoned) */
123+
int __ro_after_init sve_max_vl = -1;
124+
/* Set of available vector lengths, as vq_to_bit(vq): */
125+
static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
126+
127+
#else /* ! CONFIG_ARM64_SVE */
128+
129+
/* Dummy declaration for code that will be optimised out: */
130+
extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
131+
132+
#endif /* ! CONFIG_ARM64_SVE */
133+
117134
/*
118135
* Call __sve_free() directly only if you know task can't be scheduled
119136
* or preempted.
@@ -271,6 +288,50 @@ static void task_fpsimd_save(void)
271288
}
272289
}
273290

291+
/*
292+
* Helpers to translate bit indices in sve_vq_map to VQ values (and
293+
* vice versa). This allows find_next_bit() to be used to find the
294+
* _maximum_ VQ not exceeding a certain value.
295+
*/
296+
297+
static unsigned int vq_to_bit(unsigned int vq)
298+
{
299+
return SVE_VQ_MAX - vq;
300+
}
301+
302+
static unsigned int bit_to_vq(unsigned int bit)
303+
{
304+
if (WARN_ON(bit >= SVE_VQ_MAX))
305+
bit = SVE_VQ_MAX - 1;
306+
307+
return SVE_VQ_MAX - bit;
308+
}
309+
310+
/*
311+
* All vector length selection from userspace comes through here.
312+
* We're on a slow path, so some sanity-checks are included.
313+
* If things go wrong there's a bug somewhere, but try to fall back to a
314+
* safe choice.
315+
*/
316+
static unsigned int find_supported_vector_length(unsigned int vl)
317+
{
318+
int bit;
319+
int max_vl = sve_max_vl;
320+
321+
if (WARN_ON(!sve_vl_valid(vl)))
322+
vl = SVE_VL_MIN;
323+
324+
if (WARN_ON(!sve_vl_valid(max_vl)))
325+
max_vl = SVE_VL_MIN;
326+
327+
if (vl > max_vl)
328+
vl = max_vl;
329+
330+
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
331+
vq_to_bit(sve_vq_from_vl(vl)));
332+
return sve_vl_from_vq(bit_to_vq(bit));
333+
}
334+
274335
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
275336
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
276337

@@ -365,6 +426,76 @@ void sve_alloc(struct task_struct *task)
365426
BUG_ON(!task->thread.sve_state);
366427
}
367428

429+
int sve_set_vector_length(struct task_struct *task,
430+
unsigned long vl, unsigned long flags)
431+
{
432+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
433+
PR_SVE_SET_VL_ONEXEC))
434+
return -EINVAL;
435+
436+
if (!sve_vl_valid(vl))
437+
return -EINVAL;
438+
439+
/*
440+
* Clamp to the maximum vector length that VL-agnostic SVE code can
441+
* work with. A flag may be assigned in the future to allow setting
442+
* of larger vector lengths without confusing older software.
443+
*/
444+
if (vl > SVE_VL_ARCH_MAX)
445+
vl = SVE_VL_ARCH_MAX;
446+
447+
vl = find_supported_vector_length(vl);
448+
449+
if (flags & (PR_SVE_VL_INHERIT |
450+
PR_SVE_SET_VL_ONEXEC))
451+
task->thread.sve_vl_onexec = vl;
452+
else
453+
/* Reset VL to system default on next exec: */
454+
task->thread.sve_vl_onexec = 0;
455+
456+
/* Only actually set the VL if not deferred: */
457+
if (flags & PR_SVE_SET_VL_ONEXEC)
458+
goto out;
459+
460+
if (vl == task->thread.sve_vl)
461+
goto out;
462+
463+
/*
464+
* To ensure the FPSIMD bits of the SVE vector registers are preserved,
465+
* write any live register state back to task_struct, and convert to a
466+
* non-SVE thread.
467+
*/
468+
if (task == current) {
469+
local_bh_disable();
470+
471+
task_fpsimd_save();
472+
set_thread_flag(TIF_FOREIGN_FPSTATE);
473+
}
474+
475+
fpsimd_flush_task_state(task);
476+
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
477+
sve_to_fpsimd(task);
478+
479+
if (task == current)
480+
local_bh_enable();
481+
482+
/*
483+
* Force reallocation of task SVE state to the correct size
484+
* on next use:
485+
*/
486+
sve_free(task);
487+
488+
task->thread.sve_vl = vl;
489+
490+
out:
491+
if (flags & PR_SVE_VL_INHERIT)
492+
set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
493+
else
494+
clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
495+
496+
return 0;
497+
}
498+
368499
/*
369500
* Called from the put_task_struct() path, which cannot get here
370501
* unless dead_task is really dead and not schedulable.
@@ -481,7 +612,7 @@ void fpsimd_thread_switch(struct task_struct *next)
481612

482613
void fpsimd_flush_thread(void)
483614
{
484-
int vl;
615+
int vl, supported_vl;
485616

486617
if (!system_supports_fpsimd())
487618
return;
@@ -509,6 +640,10 @@ void fpsimd_flush_thread(void)
509640
if (WARN_ON(!sve_vl_valid(vl)))
510641
vl = SVE_VL_MIN;
511642

643+
supported_vl = find_supported_vector_length(vl);
644+
if (WARN_ON(supported_vl != vl))
645+
vl = supported_vl;
646+
512647
current->thread.sve_vl = vl;
513648

514649
/*

include/uapi/linux/prctl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,9 @@ struct prctl_mm_map {
197197
# define PR_CAP_AMBIENT_LOWER 3
198198
# define PR_CAP_AMBIENT_CLEAR_ALL 4
199199

200+
/* arm64 Scalable Vector Extension controls */
201+
# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
202+
# define PR_SVE_VL_LEN_MASK 0xffff
203+
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
204+
200205
#endif /* _LINUX_PRCTL_H */

0 commit comments

Comments
 (0)