Skip to content

Commit 07f8ba7

Browse files
RISC-V: User-Visible Changes
This merge contains the user-visible, ABI-breaking changes that we want to make sure we have in Linux before our first release. Highlights include: * VDSO entries for clock_get/gettimeofday/getcpu have been added. These are simple syscalls now, but we want to let glibc use them from the start so we can make them faster later. * A VDSO entry for instruction cache flushing has been added so userspace can flush the instruction cache. * The VDSO symbol versions for __vdso_cmpxchg{32,64} have been removed, as those VDSO entries don't actually exist. Conflicts: arch/riscv/include/asm/tlbflush.h
2 parents f8182f6 + 0e710ac commit 07f8ba7

19 files changed

+392
-34
lines changed

arch/riscv/include/asm/cacheflush.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,44 @@
1818

1919
#undef flush_icache_range
2020
#undef flush_icache_user_range
21+
#undef flush_dcache_page
2122

2223
static inline void local_flush_icache_all(void)
2324
{
2425
asm volatile ("fence.i" ::: "memory");
2526
}
2627

28+
#define PG_dcache_clean PG_arch_1
29+
30+
static inline void flush_dcache_page(struct page *page)
31+
{
32+
if (test_bit(PG_dcache_clean, &page->flags))
33+
clear_bit(PG_dcache_clean, &page->flags);
34+
}
35+
36+
/*
37+
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
38+
* so instead we just flush the whole thing.
39+
*/
40+
#define flush_icache_range(start, end) flush_icache_all()
41+
#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
42+
2743
#ifndef CONFIG_SMP
2844

29-
#define flush_icache_range(start, end) local_flush_icache_all()
30-
#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
45+
#define flush_icache_all() local_flush_icache_all()
46+
#define flush_icache_mm(mm, local) flush_icache_all()
3147

3248
#else /* CONFIG_SMP */
3349

34-
#define flush_icache_range(start, end) sbi_remote_fence_i(0)
35-
#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
50+
#define flush_icache_all() sbi_remote_fence_i(0)
51+
void flush_icache_mm(struct mm_struct *mm, bool local);
3652

3753
#endif /* CONFIG_SMP */
3854

55+
/*
56+
* Bits in sys_riscv_flush_icache()'s flags argument.
57+
*/
58+
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
59+
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
60+
3961
#endif /* _ASM_RISCV_CACHEFLUSH_H */

arch/riscv/include/asm/mmu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919

2020
typedef struct {
2121
void *vdso;
22+
#ifdef CONFIG_SMP
23+
/* A local icache flush is needed before user execution can resume. */
24+
cpumask_t icache_stale_mask;
25+
#endif
2226
} mm_context_t;
2327

2428
#endif /* __ASSEMBLY__ */

arch/riscv/include/asm/mmu_context.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (C) 2012 Regents of the University of California
3+
* Copyright (C) 2017 SiFive
34
*
45
* This program is free software; you can redistribute it and/or
56
* modify it under the terms of the GNU General Public License
@@ -19,6 +20,7 @@
1920
#include <linux/mm.h>
2021
#include <linux/sched.h>
2122
#include <asm/tlbflush.h>
23+
#include <asm/cacheflush.h>
2224

2325
static inline void enter_lazy_tlb(struct mm_struct *mm,
2426
struct task_struct *task)
@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd)
4648
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
4749
}
4850

51+
/*
52+
* When necessary, performs a deferred icache flush for the given MM context,
53+
* on the local CPU. RISC-V has no direct mechanism for instruction cache
54+
* shoot downs, so instead we send an IPI that informs the remote harts they
55+
* need to flush their local instruction caches. To avoid pathologically slow
56+
* behavior in a common case (a bunch of single-hart processes on a many-hart
57+
* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
58+
* executing a MM context and instead schedule a deferred local instruction
59+
* cache flush to be performed before execution resumes on each hart. This
60+
* actually performs that local instruction cache flush, which implicitly only
61+
* refers to the current hart.
62+
*/
63+
static inline void flush_icache_deferred(struct mm_struct *mm)
64+
{
65+
#ifdef CONFIG_SMP
66+
unsigned int cpu = smp_processor_id();
67+
cpumask_t *mask = &mm->context.icache_stale_mask;
68+
69+
if (cpumask_test_cpu(cpu, mask)) {
70+
cpumask_clear_cpu(cpu, mask);
71+
/*
72+
* Ensure the remote hart's writes are visible to this hart.
73+
* This pairs with a barrier in flush_icache_mm.
74+
*/
75+
smp_mb();
76+
local_flush_icache_all();
77+
}
78+
#endif
79+
}
80+
4981
static inline void switch_mm(struct mm_struct *prev,
5082
struct mm_struct *next, struct task_struct *task)
5183
{
5284
if (likely(prev != next)) {
85+
/*
86+
* Mark the current MM context as inactive, and the next as
87+
* active. This is at least used by the icache flushing
88+
* routines in order to determine who should
89+
*/
90+
unsigned int cpu = smp_processor_id();
91+
92+
cpumask_clear_cpu(cpu, mm_cpumask(prev));
93+
cpumask_set_cpu(cpu, mm_cpumask(next));
94+
5395
set_pgdir(next->pgd);
5496
local_flush_tlb_all();
97+
98+
flush_icache_deferred(next);
5599
}
56100
}
57101

arch/riscv/include/asm/pgtable.h

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
178178
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
179179
#define pte_unmap(pte) ((void)(pte))
180180

181-
/*
182-
* Certain architectures need to do special things when PTEs within
183-
* a page table are directly modified. Thus, the following hook is
184-
* made available.
185-
*/
186-
static inline void set_pte(pte_t *ptep, pte_t pteval)
187-
{
188-
*ptep = pteval;
189-
}
190-
191-
static inline void set_pte_at(struct mm_struct *mm,
192-
unsigned long addr, pte_t *ptep, pte_t pteval)
193-
{
194-
set_pte(ptep, pteval);
195-
}
196-
197-
static inline void pte_clear(struct mm_struct *mm,
198-
unsigned long addr, pte_t *ptep)
199-
{
200-
set_pte_at(mm, addr, ptep, __pte(0));
201-
}
202-
203181
static inline int pte_present(pte_t pte)
204182
{
205183
return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
210188
return (pte_val(pte) == 0);
211189
}
212190

213-
/* static inline int pte_read(pte_t pte) */
214-
215191
static inline int pte_write(pte_t pte)
216192
{
217193
return pte_val(pte) & _PAGE_WRITE;
218194
}
219195

196+
static inline int pte_exec(pte_t pte)
197+
{
198+
return pte_val(pte) & _PAGE_EXEC;
199+
}
200+
220201
static inline int pte_huge(pte_t pte)
221202
{
222203
return pte_present(pte)
223204
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
224205
}
225206

226-
/* static inline int pte_exec(pte_t pte) */
227-
228207
static inline int pte_dirty(pte_t pte)
229208
{
230209
return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
311290
return pte_val(pte_a) == pte_val(pte_b);
312291
}
313292

293+
/*
294+
* Certain architectures need to do special things when PTEs within
295+
* a page table are directly modified. Thus, the following hook is
296+
* made available.
297+
*/
298+
static inline void set_pte(pte_t *ptep, pte_t pteval)
299+
{
300+
*ptep = pteval;
301+
}
302+
303+
void flush_icache_pte(pte_t pte);
304+
305+
static inline void set_pte_at(struct mm_struct *mm,
306+
unsigned long addr, pte_t *ptep, pte_t pteval)
307+
{
308+
if (pte_present(pteval) && pte_exec(pteval))
309+
flush_icache_pte(pteval);
310+
311+
set_pte(ptep, pteval);
312+
}
313+
314+
static inline void pte_clear(struct mm_struct *mm,
315+
unsigned long addr, pte_t *ptep)
316+
{
317+
set_pte_at(mm, addr, ptep, __pte(0));
318+
}
319+
314320
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
315321
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
316322
unsigned long address, pte_t *ptep,

arch/riscv/include/asm/tlbflush.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#ifdef CONFIG_MMU
1919

20+
#include <linux/mm_types.h>
21+
2022
/*
2123
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
2224
* cache as well, so a 'fence.i' is not necessary.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright (C) 2017 SiFive
3+
*
4+
* This program is free software; you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License version 2 as
6+
* published by the Free Software Foundation.
7+
*
8+
* This program is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
* GNU General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU General Public License
14+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
*/
16+
17+
#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
18+
#define _ASM_RISCV_VDSO_SYSCALLS_H
19+
20+
#ifdef CONFIG_SMP
21+
22+
/* These syscalls are only used by the vDSO and are not in the uapi. */
23+
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
24+
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
25+
26+
#endif
27+
28+
#endif /* _ASM_RISCV_VDSO_H */

arch/riscv/include/asm/vdso.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,8 @@ struct vdso_data {
3838
(void __user *)((unsigned long)(base) + __vdso_##name); \
3939
})
4040

41+
#ifdef CONFIG_SMP
42+
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
43+
#endif
44+
4145
#endif /* _ASM_RISCV_VDSO_H */

arch/riscv/kernel/smp.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu)
108108
{
109109
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
110110
}
111+
112+
/*
113+
* Performs an icache flush for the given MM context. RISC-V has no direct
114+
* mechanism for instruction cache shoot downs, so instead we send an IPI that
115+
* informs the remote harts they need to flush their local instruction caches.
116+
* To avoid pathologically slow behavior in a common case (a bunch of
117+
* single-hart processes on a many-hart machine, ie 'make -j') we avoid the
118+
* IPIs for harts that are not currently executing a MM context and instead
119+
* schedule a deferred local instruction cache flush to be performed before
120+
* execution resumes on each hart.
121+
*/
122+
void flush_icache_mm(struct mm_struct *mm, bool local)
123+
{
124+
unsigned int cpu;
125+
cpumask_t others, *mask;
126+
127+
preempt_disable();
128+
129+
/* Mark every hart's icache as needing a flush for this MM. */
130+
mask = &mm->context.icache_stale_mask;
131+
cpumask_setall(mask);
132+
/* Flush this hart's I$ now, and mark it as flushed. */
133+
cpu = smp_processor_id();
134+
cpumask_clear_cpu(cpu, mask);
135+
local_flush_icache_all();
136+
137+
/*
138+
* Flush the I$ of other harts concurrently executing, and mark them as
139+
* flushed.
140+
*/
141+
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
142+
local |= cpumask_empty(&others);
143+
if (mm != current->active_mm || !local)
144+
sbi_remote_fence_i(others.bits);
145+
else {
146+
/*
147+
* It's assumed that at least one strongly ordered operation is
148+
* performed on this hart between setting a hart's cpumask bit
149+
* and scheduling this MM context on that hart. Sending an SBI
150+
* remote message will do this, but in the case where no
151+
* messages are sent we still need to order this hart's writes
152+
* with flush_icache_deferred().
153+
*/
154+
smp_mb();
155+
}
156+
157+
preempt_enable();
158+
}

arch/riscv/kernel/sys_riscv.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
*/
1515

1616
#include <linux/syscalls.h>
17-
#include <asm/cmpxchg.h>
1817
#include <asm/unistd.h>
18+
#include <asm/cacheflush.h>
1919

2020
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
2121
unsigned long prot, unsigned long flags,
@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
4747
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
4848
}
4949
#endif /* !CONFIG_64BIT */
50+
51+
#ifdef CONFIG_SMP
52+
/*
53+
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
54+
* having a direct 'fence.i' instruction available to userspace (which we
55+
* can't trap!), that's not actually viable when running on Linux because the
56+
* kernel might schedule a process on another hart. There is no way for
57+
* userspace to handle this without invoking the kernel (as it doesn't know the
58+
* thread->hart mappings), so we've defined a RISC-V specific system call to
59+
* flush the instruction cache.
60+
*
61+
* sys_riscv_flush_icache() is defined to flush the instruction cache over an
62+
* address range, with the flush applying to either all threads or just the
63+
* caller. We don't currently do anything with the address range, that's just
64+
* in there for forwards compatibility.
65+
*/
66+
SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
67+
uintptr_t, flags)
68+
{
69+
struct mm_struct *mm = current->mm;
70+
bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
71+
72+
/* Check the reserved flags. */
73+
if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
74+
return -EINVAL;
75+
76+
flush_icache_mm(mm, local);
77+
78+
return 0;
79+
}
80+
#endif

arch/riscv/kernel/syscall_table.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#include <linux/linkage.h>
1616
#include <linux/syscalls.h>
1717
#include <asm-generic/syscalls.h>
18+
#include <asm/vdso.h>
1819

1920
#undef __SYSCALL
2021
#define __SYSCALL(nr, call) [nr] = (call),
2122

2223
void *sys_call_table[__NR_syscalls] = {
2324
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
2425
#include <asm/unistd.h>
26+
#include <asm/vdso-syscalls.h>
2527
};

arch/riscv/kernel/vdso/Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
# Copied from arch/tile/kernel/vdso/Makefile
22

33
# Symbols present in the vdso
4-
vdso-syms = rt_sigreturn
4+
vdso-syms = rt_sigreturn
5+
vdso-syms += gettimeofday
6+
vdso-syms += clock_gettime
7+
vdso-syms += clock_getres
8+
vdso-syms += getcpu
9+
vdso-syms += flush_icache
510

611
# Files to link into the vdso
712
obj-vdso = $(patsubst %, %.o, $(vdso-syms))

0 commit comments

Comments
 (0)