|
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18 | 18 | */
|
19 | 19 |
|
| 20 | +#include <linux/bitmap.h> |
20 | 21 | #include <linux/bottom_half.h>
|
21 | 22 | #include <linux/bug.h>
|
| 23 | +#include <linux/cache.h> |
22 | 24 | #include <linux/compat.h>
|
23 | 25 | #include <linux/cpu.h>
|
24 | 26 | #include <linux/cpu_pm.h>
|
|
28 | 30 | #include <linux/init.h>
|
29 | 31 | #include <linux/percpu.h>
|
30 | 32 | #include <linux/preempt.h>
|
| 33 | +#include <linux/prctl.h> |
31 | 34 | #include <linux/ptrace.h>
|
32 | 35 | #include <linux/sched/signal.h>
|
33 | 36 | #include <linux/sched/task_stack.h>
|
@@ -114,6 +117,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
|
114 | 117 | /* Default VL for tasks that don't set it explicitly: */
|
115 | 118 | static int sve_default_vl = SVE_VL_MIN;
|
116 | 119 |
|
| 120 | +#ifdef CONFIG_ARM64_SVE |
| 121 | + |
| 122 | +/* Maximum supported vector length across all CPUs (initially poisoned) */ |
| 123 | +int __ro_after_init sve_max_vl = -1; |
| 124 | +/* Set of available vector lengths, as vq_to_bit(vq): */ |
| 125 | +static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); |
| 126 | + |
| 127 | +#else /* ! CONFIG_ARM64_SVE */ |
| 128 | + |
| 129 | +/* Dummy declaration for code that will be optimised out: */ |
| 130 | +extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); |
| 131 | + |
| 132 | +#endif /* ! CONFIG_ARM64_SVE */ |
| 133 | + |
117 | 134 | /*
|
118 | 135 | * Call __sve_free() directly only if you know task can't be scheduled
|
119 | 136 | * or preempted.
|
@@ -271,6 +288,50 @@ static void task_fpsimd_save(void)
|
271 | 288 | }
|
272 | 289 | }
|
273 | 290 |
|
| 291 | +/* |
| 292 | + * Helpers to translate bit indices in sve_vq_map to VQ values (and |
| 293 | + * vice versa). This allows find_next_bit() to be used to find the |
| 294 | + * _maximum_ VQ not exceeding a certain value. |
| 295 | + */ |
| 296 | + |
| 297 | +static unsigned int vq_to_bit(unsigned int vq) |
| 298 | +{ |
| 299 | + return SVE_VQ_MAX - vq; |
| 300 | +} |
| 301 | + |
| 302 | +static unsigned int bit_to_vq(unsigned int bit) |
| 303 | +{ |
| 304 | + if (WARN_ON(bit >= SVE_VQ_MAX)) |
| 305 | + bit = SVE_VQ_MAX - 1; |
| 306 | + |
| 307 | + return SVE_VQ_MAX - bit; |
| 308 | +} |
| 309 | + |
| 310 | +/* |
| 311 | + * All vector length selection from userspace comes through here. |
| 312 | + * We're on a slow path, so some sanity-checks are included. |
| 313 | + * If things go wrong there's a bug somewhere, but try to fall back to a |
| 314 | + * safe choice. |
| 315 | + */ |
| 316 | +static unsigned int find_supported_vector_length(unsigned int vl) |
| 317 | +{ |
| 318 | + int bit; |
| 319 | + int max_vl = sve_max_vl; |
| 320 | + |
| 321 | + if (WARN_ON(!sve_vl_valid(vl))) |
| 322 | + vl = SVE_VL_MIN; |
| 323 | + |
| 324 | + if (WARN_ON(!sve_vl_valid(max_vl))) |
| 325 | + max_vl = SVE_VL_MIN; |
| 326 | + |
| 327 | + if (vl > max_vl) |
| 328 | + vl = max_vl; |
| 329 | + |
| 330 | + bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, |
| 331 | + vq_to_bit(sve_vq_from_vl(vl))); |
| 332 | + return sve_vl_from_vq(bit_to_vq(bit)); |
| 333 | +} |
| 334 | + |
274 | 335 | #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
|
275 | 336 | (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
|
276 | 337 |
|
@@ -365,6 +426,76 @@ void sve_alloc(struct task_struct *task)
|
365 | 426 | BUG_ON(!task->thread.sve_state);
|
366 | 427 | }
|
367 | 428 |
|
| 429 | +int sve_set_vector_length(struct task_struct *task, |
| 430 | + unsigned long vl, unsigned long flags) |
| 431 | +{ |
| 432 | + if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | |
| 433 | + PR_SVE_SET_VL_ONEXEC)) |
| 434 | + return -EINVAL; |
| 435 | + |
| 436 | + if (!sve_vl_valid(vl)) |
| 437 | + return -EINVAL; |
| 438 | + |
| 439 | + /* |
| 440 | + * Clamp to the maximum vector length that VL-agnostic SVE code can |
| 441 | + * work with. A flag may be assigned in the future to allow setting |
| 442 | + * of larger vector lengths without confusing older software. |
| 443 | + */ |
| 444 | + if (vl > SVE_VL_ARCH_MAX) |
| 445 | + vl = SVE_VL_ARCH_MAX; |
| 446 | + |
| 447 | + vl = find_supported_vector_length(vl); |
| 448 | + |
| 449 | + if (flags & (PR_SVE_VL_INHERIT | |
| 450 | + PR_SVE_SET_VL_ONEXEC)) |
| 451 | + task->thread.sve_vl_onexec = vl; |
| 452 | + else |
| 453 | + /* Reset VL to system default on next exec: */ |
| 454 | + task->thread.sve_vl_onexec = 0; |
| 455 | + |
| 456 | + /* Only actually set the VL if not deferred: */ |
| 457 | + if (flags & PR_SVE_SET_VL_ONEXEC) |
| 458 | + goto out; |
| 459 | + |
| 460 | + if (vl == task->thread.sve_vl) |
| 461 | + goto out; |
| 462 | + |
| 463 | + /* |
| 464 | + * To ensure the FPSIMD bits of the SVE vector registers are preserved, |
| 465 | + * write any live register state back to task_struct, and convert to a |
| 466 | + * non-SVE thread. |
| 467 | + */ |
| 468 | + if (task == current) { |
| 469 | + local_bh_disable(); |
| 470 | + |
| 471 | + task_fpsimd_save(); |
| 472 | + set_thread_flag(TIF_FOREIGN_FPSTATE); |
| 473 | + } |
| 474 | + |
| 475 | + fpsimd_flush_task_state(task); |
| 476 | + if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) |
| 477 | + sve_to_fpsimd(task); |
| 478 | + |
| 479 | + if (task == current) |
| 480 | + local_bh_enable(); |
| 481 | + |
| 482 | + /* |
| 483 | + * Force reallocation of task SVE state to the correct size |
| 484 | + * on next use: |
| 485 | + */ |
| 486 | + sve_free(task); |
| 487 | + |
| 488 | + task->thread.sve_vl = vl; |
| 489 | + |
| 490 | +out: |
| 491 | + if (flags & PR_SVE_VL_INHERIT) |
| 492 | + set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); |
| 493 | + else |
| 494 | + clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); |
| 495 | + |
| 496 | + return 0; |
| 497 | +} |
| 498 | + |
368 | 499 | /*
|
369 | 500 | * Called from the put_task_struct() path, which cannot get here
|
370 | 501 | * unless dead_task is really dead and not schedulable.
|
@@ -481,7 +612,7 @@ void fpsimd_thread_switch(struct task_struct *next)
|
481 | 612 |
|
482 | 613 | void fpsimd_flush_thread(void)
|
483 | 614 | {
|
484 |
| - int vl; |
| 615 | + int vl, supported_vl; |
485 | 616 |
|
486 | 617 | if (!system_supports_fpsimd())
|
487 | 618 | return;
|
@@ -509,6 +640,10 @@ void fpsimd_flush_thread(void)
|
509 | 640 | if (WARN_ON(!sve_vl_valid(vl)))
|
510 | 641 | vl = SVE_VL_MIN;
|
511 | 642 |
|
| 643 | + supported_vl = find_supported_vector_length(vl); |
| 644 | + if (WARN_ON(supported_vl != vl)) |
| 645 | + vl = supported_vl; |
| 646 | + |
512 | 647 | current->thread.sve_vl = vl;
|
513 | 648 |
|
514 | 649 | /*
|
|
0 commit comments