Skip to content

Commit 08f051e

Browse files
aswatermanpalmer-dabbelt
authored andcommitted
RISC-V: Flush I$ when making a dirty page executable
The RISC-V ISA allows for instruction caches that are not coherent WRT stores, even on a single hart. As a result, we need to explicitly flush the instruction cache whenever marking a dirty page as executable in order to preserve the correct system behavior. Local instruction caches aren't that scary (our implementations actually flush the cache, but RISC-V is defined to allow higher-performance implementations to exist), but RISC-V defines no way to perform an instruction cache shootdown. When explicitly asked to do so we can shoot down remote instruction caches via an IPI, but this is a bit on the slow side. Instead of requiring an IPI to all harts whenever marking a page as executable, we simply flush the currently running harts. In order to maintain correct behavior, we additionally mark every other hart as needing a deferred instruction cache which will be taken before anything runs on it. Signed-off-by: Andrew Waterman <andrew@sifive.com> Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
1 parent 28dfbe6 commit 08f051e

File tree

8 files changed

+174
-30
lines changed

8 files changed

+174
-30
lines changed

arch/riscv/include/asm/cacheflush.h

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,37 @@
1818

1919
#undef flush_icache_range
2020
#undef flush_icache_user_range
21+
#undef flush_dcache_page
2122

2223
static inline void local_flush_icache_all(void)
2324
{
2425
asm volatile ("fence.i" ::: "memory");
2526
}
2627

28+
#define PG_dcache_clean PG_arch_1
29+
30+
static inline void flush_dcache_page(struct page *page)
31+
{
32+
if (test_bit(PG_dcache_clean, &page->flags))
33+
clear_bit(PG_dcache_clean, &page->flags);
34+
}
35+
36+
/*
37+
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
38+
* so instead we just flush the whole thing.
39+
*/
40+
#define flush_icache_range(start, end) flush_icache_all()
41+
#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
42+
2743
#ifndef CONFIG_SMP
2844

29-
#define flush_icache_range(start, end) local_flush_icache_all()
30-
#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
45+
#define flush_icache_all() local_flush_icache_all()
46+
#define flush_icache_mm(mm, local) flush_icache_all()
3147

3248
#else /* CONFIG_SMP */
3349

34-
#define flush_icache_range(start, end) sbi_remote_fence_i(0)
35-
#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
50+
#define flush_icache_all() sbi_remote_fence_i(0)
51+
void flush_icache_mm(struct mm_struct *mm, bool local);
3652

3753
#endif /* CONFIG_SMP */
3854

arch/riscv/include/asm/mmu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919

2020
typedef struct {
2121
void *vdso;
22+
#ifdef CONFIG_SMP
23+
/* A local icache flush is needed before user execution can resume. */
24+
cpumask_t icache_stale_mask;
25+
#endif
2226
} mm_context_t;
2327

2428
#endif /* __ASSEMBLY__ */

arch/riscv/include/asm/mmu_context.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (C) 2012 Regents of the University of California
3+
* Copyright (C) 2017 SiFive
34
*
45
* This program is free software; you can redistribute it and/or
56
* modify it under the terms of the GNU General Public License
@@ -19,6 +20,7 @@
1920
#include <linux/mm.h>
2021
#include <linux/sched.h>
2122
#include <asm/tlbflush.h>
23+
#include <asm/cacheflush.h>
2224

2325
static inline void enter_lazy_tlb(struct mm_struct *mm,
2426
struct task_struct *task)
@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd)
4648
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
4749
}
4850

51+
/*
52+
* When necessary, performs a deferred icache flush for the given MM context,
53+
* on the local CPU. RISC-V has no direct mechanism for instruction cache
54+
* shoot downs, so instead we send an IPI that informs the remote harts they
55+
* need to flush their local instruction caches. To avoid pathologically slow
56+
* behavior in a common case (a bunch of single-hart processes on a many-hart
57+
* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
58+
* executing a MM context and instead schedule a deferred local instruction
59+
* cache flush to be performed before execution resumes on each hart. This
60+
* actually performs that local instruction cache flush, which implicitly only
61+
* refers to the current hart.
62+
*/
63+
static inline void flush_icache_deferred(struct mm_struct *mm)
64+
{
65+
#ifdef CONFIG_SMP
66+
unsigned int cpu = smp_processor_id();
67+
cpumask_t *mask = &mm->context.icache_stale_mask;
68+
69+
if (cpumask_test_cpu(cpu, mask)) {
70+
cpumask_clear_cpu(cpu, mask);
71+
/*
72+
* Ensure the remote hart's writes are visible to this hart.
73+
* This pairs with a barrier in flush_icache_mm.
74+
*/
75+
smp_mb();
76+
local_flush_icache_all();
77+
}
78+
#endif
79+
}
80+
4981
static inline void switch_mm(struct mm_struct *prev,
5082
struct mm_struct *next, struct task_struct *task)
5183
{
5284
if (likely(prev != next)) {
85+
/*
86+
* Mark the current MM context as inactive, and the next as
87+
* active. This is at least used by the icache flushing
88+
* routines in order to determine who should
89+
*/
90+
unsigned int cpu = smp_processor_id();
91+
92+
cpumask_clear_cpu(cpu, mm_cpumask(prev));
93+
cpumask_set_cpu(cpu, mm_cpumask(next));
94+
5395
set_pgdir(next->pgd);
5496
local_flush_tlb_all();
97+
98+
flush_icache_deferred(next);
5599
}
56100
}
57101

arch/riscv/include/asm/pgtable.h

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
178178
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
179179
#define pte_unmap(pte) ((void)(pte))
180180

181-
/*
182-
* Certain architectures need to do special things when PTEs within
183-
* a page table are directly modified. Thus, the following hook is
184-
* made available.
185-
*/
186-
static inline void set_pte(pte_t *ptep, pte_t pteval)
187-
{
188-
*ptep = pteval;
189-
}
190-
191-
static inline void set_pte_at(struct mm_struct *mm,
192-
unsigned long addr, pte_t *ptep, pte_t pteval)
193-
{
194-
set_pte(ptep, pteval);
195-
}
196-
197-
static inline void pte_clear(struct mm_struct *mm,
198-
unsigned long addr, pte_t *ptep)
199-
{
200-
set_pte_at(mm, addr, ptep, __pte(0));
201-
}
202-
203181
static inline int pte_present(pte_t pte)
204182
{
205183
return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
210188
return (pte_val(pte) == 0);
211189
}
212190

213-
/* static inline int pte_read(pte_t pte) */
214-
215191
static inline int pte_write(pte_t pte)
216192
{
217193
return pte_val(pte) & _PAGE_WRITE;
218194
}
219195

196+
static inline int pte_exec(pte_t pte)
197+
{
198+
return pte_val(pte) & _PAGE_EXEC;
199+
}
200+
220201
static inline int pte_huge(pte_t pte)
221202
{
222203
return pte_present(pte)
223204
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
224205
}
225206

226-
/* static inline int pte_exec(pte_t pte) */
227-
228207
static inline int pte_dirty(pte_t pte)
229208
{
230209
return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
311290
return pte_val(pte_a) == pte_val(pte_b);
312291
}
313292

293+
/*
294+
* Certain architectures need to do special things when PTEs within
295+
* a page table are directly modified. Thus, the following hook is
296+
* made available.
297+
*/
298+
static inline void set_pte(pte_t *ptep, pte_t pteval)
299+
{
300+
*ptep = pteval;
301+
}
302+
303+
void flush_icache_pte(pte_t pte);
304+
305+
static inline void set_pte_at(struct mm_struct *mm,
306+
unsigned long addr, pte_t *ptep, pte_t pteval)
307+
{
308+
if (pte_present(pteval) && pte_exec(pteval))
309+
flush_icache_pte(pteval);
310+
311+
set_pte(ptep, pteval);
312+
}
313+
314+
static inline void pte_clear(struct mm_struct *mm,
315+
unsigned long addr, pte_t *ptep)
316+
{
317+
set_pte_at(mm, addr, ptep, __pte(0));
318+
}
319+
314320
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
315321
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
316322
unsigned long address, pte_t *ptep,

arch/riscv/include/asm/tlbflush.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#ifdef CONFIG_MMU
1919

20+
#include <linux/mm_types.h>
21+
2022
/* Flush entire local TLB */
2123
static inline void local_flush_tlb_all(void)
2224
{

arch/riscv/kernel/smp.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu)
108108
{
109109
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
110110
}
111+
112+
/*
113+
* Performs an icache flush for the given MM context. RISC-V has no direct
114+
* mechanism for instruction cache shoot downs, so instead we send an IPI that
115+
* informs the remote harts they need to flush their local instruction caches.
116+
* To avoid pathologically slow behavior in a common case (a bunch of
117+
* single-hart processes on a many-hart machine, ie 'make -j') we avoid the
118+
* IPIs for harts that are not currently executing a MM context and instead
119+
* schedule a deferred local instruction cache flush to be performed before
120+
* execution resumes on each hart.
121+
*/
122+
void flush_icache_mm(struct mm_struct *mm, bool local)
123+
{
124+
unsigned int cpu;
125+
cpumask_t others, *mask;
126+
127+
preempt_disable();
128+
129+
/* Mark every hart's icache as needing a flush for this MM. */
130+
mask = &mm->context.icache_stale_mask;
131+
cpumask_setall(mask);
132+
/* Flush this hart's I$ now, and mark it as flushed. */
133+
cpu = smp_processor_id();
134+
cpumask_clear_cpu(cpu, mask);
135+
local_flush_icache_all();
136+
137+
/*
138+
* Flush the I$ of other harts concurrently executing, and mark them as
139+
* flushed.
140+
*/
141+
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
142+
local |= cpumask_empty(&others);
143+
if (mm != current->active_mm || !local)
144+
sbi_remote_fence_i(others.bits);
145+
else {
146+
/*
147+
* It's assumed that at least one strongly ordered operation is
148+
* performed on this hart between setting a hart's cpumask bit
149+
* and scheduling this MM context on that hart. Sending an SBI
150+
* remote message will do this, but in the case where no
151+
* messages are sent we still need to order this hart's writes
152+
* with flush_icache_deferred().
153+
*/
154+
smp_mb();
155+
}
156+
157+
preempt_enable();
158+
}

arch/riscv/mm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ obj-y += init.o
22
obj-y += fault.o
33
obj-y += extable.o
44
obj-y += ioremap.o
5+
obj-y += cacheflush.o

arch/riscv/mm/cacheflush.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright (C) 2017 SiFive
3+
*
4+
* This program is free software; you can redistribute it and/or
5+
* modify it under the terms of the GNU General Public License
6+
* as published by the Free Software Foundation, version 2.
7+
*
8+
* This program is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
* GNU General Public License for more details.
12+
*/
13+
14+
#include <asm/pgtable.h>
15+
#include <asm/cacheflush.h>
16+
17+
void flush_icache_pte(pte_t pte)
18+
{
19+
struct page *page = pte_page(pte);
20+
21+
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
22+
flush_icache_all();
23+
}

0 commit comments

Comments
 (0)