Skip to content

Commit 4b1967c

Browse files
committed
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon: "The critical one here is a fix for fpsimd register corruption across signals which was introduced by the SVE support code (the register files overlap), but the others are worth having as well. Summary: - Fix FP register corruption when SVE is not available or in use - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y - Missing 'const' generating errors with LTO builds - Remove unsupported events from Cortex-A73 PMU description - Removal of stale and incorrect comments" * tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: arm64: context: Fix comments and remove pointless smp_wmb() arm64: cpu_ops: Add missing 'const' qualifiers arm64: perf: remove unsupported events for Cortex-A73 arm64: fpsimd: Fix failure to restore FPSIMD state after signals arm64: pgd: Mark pgd_cache as __ro_after_init arm64: ftrace: emit ftrace-mod.o contents through code arm64: module-plts: factor out PLT generation code for ftrace arm64: mm: cleanup stale AIVIVT references
2 parents a0651c7 + 3a33c76 commit 4b1967c

File tree

13 files changed

+92
-93
lines changed

13 files changed

+92
-93
lines changed

arch/arm64/Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ endif
8383

8484
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
8585
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
86-
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
87-
KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
88-
endif
8986
endif
9087

9188
# Default value

arch/arm64/include/asm/cacheflush.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
*
3939
* See Documentation/cachetlb.txt for more information. Please note that
4040
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
41-
* VIPT or ASID-tagged VIVT I-cache.
41+
* VIPT I-cache.
4242
*
4343
* flush_cache_mm(mm)
4444
*

arch/arm64/include/asm/module.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct mod_arch_specific {
3232
struct mod_plt_sec init;
3333

3434
/* for CONFIG_DYNAMIC_FTRACE */
35-
void *ftrace_trampoline;
35+
struct plt_entry *ftrace_trampoline;
3636
};
3737
#endif
3838

@@ -45,4 +45,48 @@ extern u64 module_alloc_base;
4545
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
4646
#endif
4747

48+
struct plt_entry {
49+
/*
50+
* A program that conforms to the AArch64 Procedure Call Standard
51+
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
52+
* IP1 (x17) may be inserted at any branch instruction that is
53+
* exposed to a relocation that supports long branches. Since that
54+
* is exactly what we are dealing with here, we are free to use x16
55+
* as a scratch register in the PLT veneers.
56+
*/
57+
__le32 mov0; /* movn x16, #0x.... */
58+
__le32 mov1; /* movk x16, #0x...., lsl #16 */
59+
__le32 mov2; /* movk x16, #0x...., lsl #32 */
60+
__le32 br; /* br x16 */
61+
};
62+
63+
static inline struct plt_entry get_plt_entry(u64 val)
64+
{
65+
/*
66+
* MOVK/MOVN/MOVZ opcode:
67+
* +--------+------------+--------+-----------+-------------+---------+
68+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
69+
* +--------+------------+--------+-----------+-------------+---------+
70+
*
71+
* Rd := 0x10 (x16)
72+
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
73+
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
74+
* sf := 1 (64-bit variant)
75+
*/
76+
return (struct plt_entry){
77+
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
78+
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
79+
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
80+
cpu_to_le32(0xd61f0200)
81+
};
82+
}
83+
84+
static inline bool plt_entries_equal(const struct plt_entry *a,
85+
const struct plt_entry *b)
86+
{
87+
return a->mov0 == b->mov0 &&
88+
a->mov1 == b->mov1 &&
89+
a->mov2 == b->mov2;
90+
}
91+
4892
#endif /* __ASM_MODULE_H */

arch/arm64/kernel/Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
6161
ifeq ($(CONFIG_DEBUG_EFI),y)
6262
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
6363
endif
64-
65-
# will be included by each individual module but not by the core kernel itself
66-
extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o

arch/arm64/kernel/cpu_ops.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
3131

3232
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
3333

34-
static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
34+
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
3535
&smp_spin_table_ops,
3636
&cpu_psci_ops,
3737
NULL,
3838
};
3939

40-
static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
40+
static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
4141
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
4242
&acpi_parking_protocol_ops,
4343
#endif
@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
4747

4848
static const struct cpu_operations * __init cpu_get_ops(const char *name)
4949
{
50-
const struct cpu_operations **ops;
50+
const struct cpu_operations *const *ops;
5151

5252
ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
5353

arch/arm64/kernel/fpsimd.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
10261026

10271027
local_bh_disable();
10281028

1029-
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
1030-
current->thread.fpsimd_state = *state;
1029+
current->thread.fpsimd_state = *state;
1030+
if (system_supports_sve() && test_thread_flag(TIF_SVE))
10311031
fpsimd_to_sve(current);
1032-
}
1032+
10331033
task_fpsimd_load();
10341034

10351035
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {

arch/arm64/kernel/ftrace-mod.S

Lines changed: 0 additions & 18 deletions
This file was deleted.

arch/arm64/kernel/ftrace.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
7676

7777
if (offset < -SZ_128M || offset >= SZ_128M) {
7878
#ifdef CONFIG_ARM64_MODULE_PLTS
79-
unsigned long *trampoline;
79+
struct plt_entry trampoline;
8080
struct module *mod;
8181

8282
/*
@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
104104
* is added in the future, but for now, the pr_err() below
105105
* deals with a theoretical issue only.
106106
*/
107-
trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
108-
if (trampoline[0] != addr) {
109-
if (trampoline[0] != 0) {
107+
trampoline = get_plt_entry(addr);
108+
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
109+
&trampoline)) {
110+
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
111+
&(struct plt_entry){})) {
110112
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
111113
return -EINVAL;
112114
}
113115

114116
/* point the trampoline to our ftrace entry point */
115117
module_disable_ro(mod);
116-
trampoline[0] = addr;
118+
*mod->arch.ftrace_trampoline = trampoline;
117119
module_enable_ro(mod, true);
118120

119121
/* update trampoline before patching in the branch */
120122
smp_wmb();
121123
}
122-
addr = (unsigned long)&trampoline[1];
124+
addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
123125
#else /* CONFIG_ARM64_MODULE_PLTS */
124126
return -EINVAL;
125127
#endif /* CONFIG_ARM64_MODULE_PLTS */

arch/arm64/kernel/module-plts.c

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,6 @@
1111
#include <linux/module.h>
1212
#include <linux/sort.h>
1313

14-
struct plt_entry {
15-
/*
16-
* A program that conforms to the AArch64 Procedure Call Standard
17-
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
18-
* IP1 (x17) may be inserted at any branch instruction that is
19-
* exposed to a relocation that supports long branches. Since that
20-
* is exactly what we are dealing with here, we are free to use x16
21-
* as a scratch register in the PLT veneers.
22-
*/
23-
__le32 mov0; /* movn x16, #0x.... */
24-
__le32 mov1; /* movk x16, #0x...., lsl #16 */
25-
__le32 mov2; /* movk x16, #0x...., lsl #32 */
26-
__le32 br; /* br x16 */
27-
};
28-
2914
static bool in_init(const struct module *mod, void *loc)
3015
{
3116
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
4025
int i = pltsec->plt_num_entries;
4126
u64 val = sym->st_value + rela->r_addend;
4227

43-
/*
44-
* MOVK/MOVN/MOVZ opcode:
45-
* +--------+------------+--------+-----------+-------------+---------+
46-
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
47-
* +--------+------------+--------+-----------+-------------+---------+
48-
*
49-
* Rd := 0x10 (x16)
50-
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
51-
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
52-
* sf := 1 (64-bit variant)
53-
*/
54-
plt[i] = (struct plt_entry){
55-
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
56-
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
57-
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
58-
cpu_to_le32(0xd61f0200)
59-
};
28+
plt[i] = get_plt_entry(val);
6029

6130
/*
6231
* Check if the entry we just created is a duplicate. Given that the
6332
* relocations are sorted, this will be the last entry we allocated.
6433
* (if one exists).
6534
*/
66-
if (i > 0 &&
67-
plt[i].mov0 == plt[i - 1].mov0 &&
68-
plt[i].mov1 == plt[i - 1].mov1 &&
69-
plt[i].mov2 == plt[i - 1].mov2)
35+
if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
7036
return (u64)&plt[i - 1];
7137

7238
pltsec->plt_num_entries++;
@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
154120
unsigned long core_plts = 0;
155121
unsigned long init_plts = 0;
156122
Elf64_Sym *syms = NULL;
123+
Elf_Shdr *tramp = NULL;
157124
int i;
158125

159126
/*
@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
165132
mod->arch.core.plt = sechdrs + i;
166133
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
167134
mod->arch.init.plt = sechdrs + i;
135+
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
136+
!strcmp(secstrings + sechdrs[i].sh_name,
137+
".text.ftrace_trampoline"))
138+
tramp = sechdrs + i;
168139
else if (sechdrs[i].sh_type == SHT_SYMTAB)
169140
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
170141
}
@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
215186
mod->arch.init.plt_num_entries = 0;
216187
mod->arch.init.plt_max_entries = init_plts;
217188

189+
if (tramp) {
190+
tramp->sh_type = SHT_NOBITS;
191+
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
192+
tramp->sh_addralign = __alignof__(struct plt_entry);
193+
tramp->sh_size = sizeof(struct plt_entry);
194+
}
195+
218196
return 0;
219197
}

arch/arm64/kernel/module.lds

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
SECTIONS {
22
.plt (NOLOAD) : { BYTE(0) }
33
.init.plt (NOLOAD) : { BYTE(0) }
4+
.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
45
}

arch/arm64/kernel/perf_event.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
262262

263263
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
264264
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
265-
266-
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
267-
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
268-
269-
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
270-
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
271265
};
272266

273267
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]

arch/arm64/mm/context.c

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
9696

9797
set_reserved_asid_bits();
9898

99-
/*
100-
* Ensure the generation bump is observed before we xchg the
101-
* active_asids.
102-
*/
103-
smp_wmb();
104-
10599
for_each_possible_cpu(i) {
106100
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
107101
/*
@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
117111
per_cpu(reserved_asids, i) = asid;
118112
}
119113

120-
/* Queue a TLB invalidate and flush the I-cache if necessary. */
114+
/*
115+
* Queue a TLB invalidation for each CPU to perform on next
116+
* context-switch
117+
*/
121118
cpumask_setall(&tlb_flush_pending);
122119
}
123120

@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
202199
asid = atomic64_read(&mm->context.id);
203200

204201
/*
205-
* The memory ordering here is subtle. We rely on the control
206-
* dependency between the generation read and the update of
207-
* active_asids to ensure that we are synchronised with a
208-
* parallel rollover (i.e. this pairs with the smp_wmb() in
209-
* flush_context).
202+
* The memory ordering here is subtle.
203+
* If our ASID matches the current generation, then we update
204+
* our active_asids entry with a relaxed xchg. Racing with a
205+
* concurrent rollover means that either:
206+
*
207+
* - We get a zero back from the xchg and end up waiting on the
208+
* lock. Taking the lock synchronises with the rollover and so
209+
* we are forced to see the updated generation.
210+
*
211+
* - We get a valid ASID back from the xchg, which means the
212+
* relaxed xchg in flush_context will treat us as reserved
213+
* because atomic RmWs are totally ordered for a given location.
210214
*/
211215
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
212216
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))

arch/arm64/mm/pgd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include <asm/page.h>
2727
#include <asm/tlbflush.h>
2828

29-
static struct kmem_cache *pgd_cache;
29+
static struct kmem_cache *pgd_cache __ro_after_init;
3030

3131
pgd_t *pgd_alloc(struct mm_struct *mm)
3232
{

0 commit comments

Comments
 (0)