Skip to content

Commit e02e07e

Browse files
chenhuacaipaulburton
authored andcommitted
MIPS: Loongson: Introduce and use loongson_llsc_mb()
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions "before each ll/lld" and "at the branch-target between ll/sc" to workaround. Otherwise, this flaw will cause deadlock occasionally (e.g. when doing heavy load test with LTP). Below is the explaination of CPU designer: "For Loongson 3 family, when a memory access instruction (load, store, or prefetch)'s executing occurs between the execution of LL and SC, the success or failure of SC is not predictable. Although programmer would not insert memory access instructions between LL and SC, the memory instructions before LL in program-order, may dynamically executed between the execution of LL/SC, so a memory fence (SYNC) is needed before LL/LLD to avoid this situation. Since Loongson-3A R2 (3A2000), we have improved our hardware design to handle this case. But we later deduce a rarely circumstance that some speculatively executed memory instructions due to branch misprediction between LL/SC still fall into the above case, so a memory fence (SYNC) at branch-target (if its target is not between LL/SC) is needed for Loongson 3A1000, 3B1500, 3A2000 and 3A3000. Our processor is continually evolving and we aim to to remove all these workaround-SYNCs around LL/SC for new-come processor." Here is an example: Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var, this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same time('sc' return 1), and the variable is only *added by 1*, sometimes, which is wrong and unacceptable(it should be added by 2). Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's __ex_table section. This patch fix all the cases in kernel, but: +. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix the ll and branch-target coincidently such as atomic_sub_if_positive/ cmpxchg/xchg, just like this one. +. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch edac.h +. local_ops and cmpxchg_local should not be affected by this bug since only the owner can write. +. mips_atomic_set for syscall.c is deprecated and rarely used, just let it go Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Huang Pei <huangpei@loongson.cn> [paul.burton@mips.com: - Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add a comment describing why it's there. - Make loongson_llsc_mb() a no-op when CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory barrier. - Add a comment describing the bug & how loongson_llsc_mb() helps in asm/barrier.h.] Signed-off-by: Paul Burton <paul.burton@mips.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: ambrosehua@gmail.com Cc: Steven J . Hill <Steven.Hill@cavium.com> Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang <zhangfx@lemote.com> Cc: Zhangjin Wu <wuzhangjin@gmail.com> Cc: Li Xuefeng <lixuefeng@loongson.cn> Cc: Xu Chenghua <xuchenghua@loongson.cn>
1 parent 67fc5dc commit e02e07e

File tree

8 files changed

+100
-0
lines changed

8 files changed

+100
-0
lines changed

arch/mips/Kconfig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,21 @@ config LOONGSON3_ENHANCEMENT
14031403
please say 'N' here. If you want a high-performance kernel to run on
14041404
new Loongson 3 machines only, please say 'Y' here.
14051405

1406+
config CPU_LOONGSON3_WORKAROUNDS
1407+
bool "Old Loongson 3 LLSC Workarounds"
1408+
default y if SMP
1409+
depends on CPU_LOONGSON3
1410+
help
1411+
Loongson 3 processors have the llsc issues which require workarounds.
1412+
Without workarounds the system may hang unexpectedly.
1413+
1414+
Newer Loongson 3 will fix these issues and no workarounds are needed.
1415+
The workarounds have no significant side effect on them but may
1416+
decrease the performance of the system so this option should be
1417+
disabled unless the kernel is intended to be run on old systems.
1418+
1419+
If unsure, please say Y.
1420+
14061421
config CPU_LOONGSON2E
14071422
bool "Loongson 2E"
14081423
depends on SYS_HAS_CPU_LOONGSON2E

arch/mips/include/asm/atomic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
5858
if (kernel_uses_llsc) { \
5959
int temp; \
6060
\
61+
loongson_llsc_mb(); \
6162
__asm__ __volatile__( \
6263
" .set push \n" \
6364
" .set "MIPS_ISA_LEVEL" \n" \
@@ -85,6 +86,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
8586
if (kernel_uses_llsc) { \
8687
int temp; \
8788
\
89+
loongson_llsc_mb(); \
8890
__asm__ __volatile__( \
8991
" .set push \n" \
9092
" .set "MIPS_ISA_LEVEL" \n" \
@@ -118,6 +120,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
118120
if (kernel_uses_llsc) { \
119121
int temp; \
120122
\
123+
loongson_llsc_mb(); \
121124
__asm__ __volatile__( \
122125
" .set push \n" \
123126
" .set "MIPS_ISA_LEVEL" \n" \
@@ -256,6 +259,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
256259
if (kernel_uses_llsc) { \
257260
long temp; \
258261
\
262+
loongson_llsc_mb(); \
259263
__asm__ __volatile__( \
260264
" .set push \n" \
261265
" .set "MIPS_ISA_LEVEL" \n" \
@@ -283,6 +287,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
283287
if (kernel_uses_llsc) { \
284288
long temp; \
285289
\
290+
loongson_llsc_mb(); \
286291
__asm__ __volatile__( \
287292
" .set push \n" \
288293
" .set "MIPS_ISA_LEVEL" \n" \
@@ -316,6 +321,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
316321
if (kernel_uses_llsc) { \
317322
long temp; \
318323
\
324+
loongson_llsc_mb(); \
319325
__asm__ __volatile__( \
320326
" .set push \n" \
321327
" .set "MIPS_ISA_LEVEL" \n" \

arch/mips/include/asm/barrier.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,42 @@
222222
#define __smp_mb__before_atomic() __smp_mb__before_llsc()
223223
#define __smp_mb__after_atomic() smp_llsc_mb()
224224

225+
/*
226+
* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
227+
* store or pref) in between an ll & sc can cause the sc instruction to
228+
* erroneously succeed, breaking atomicity. Whilst it's unusual to write code
229+
* containing such sequences, this bug bites harder than we might otherwise
230+
* expect due to reordering & speculation:
231+
*
232+
* 1) A memory access appearing prior to the ll in program order may actually
233+
* be executed after the ll - this is the reordering case.
234+
*
235+
* In order to avoid this we need to place a memory barrier (ie. a sync
236+
* instruction) prior to every ll instruction, in between it & any earlier
237+
* memory access instructions. Many of these cases are already covered by
238+
* smp_mb__before_llsc() but for the remaining cases, typically ones in
239+
* which multiple CPUs may operate on a memory location but ordering is not
240+
* usually guaranteed, we use loongson_llsc_mb() below.
241+
*
242+
* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
243+
*
244+
* 2) If a conditional branch exists between an ll & sc with a target outside
245+
* of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
246+
* or similar, then misprediction of the branch may allow speculative
247+
* execution of memory accesses from outside of the ll-sc loop.
248+
*
249+
* In order to avoid this we need a memory barrier (ie. a sync instruction)
250+
* at each affected branch target, for which we also use loongson_llsc_mb()
251+
* defined below.
252+
*
253+
* This case affects all current Loongson 3 CPUs.
254+
*/
255+
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
256+
#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
257+
#else
258+
#define loongson_llsc_mb() do { } while (0)
259+
#endif
260+
225261
#include <asm-generic/barrier.h>
226262

227263
#endif /* __ASM_BARRIER_H */

arch/mips/include/asm/bitops.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
6969
: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
7070
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
7171
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
72+
loongson_llsc_mb();
7273
do {
7374
__asm__ __volatile__(
7475
" " __LL "%0, %1 # set_bit \n"
@@ -79,6 +80,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
7980
} while (unlikely(!temp));
8081
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
8182
} else if (kernel_uses_llsc) {
83+
loongson_llsc_mb();
8284
do {
8385
__asm__ __volatile__(
8486
" .set push \n"
@@ -123,6 +125,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
123125
: "ir" (~(1UL << bit)));
124126
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
125127
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
128+
loongson_llsc_mb();
126129
do {
127130
__asm__ __volatile__(
128131
" " __LL "%0, %1 # clear_bit \n"
@@ -133,6 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
133136
} while (unlikely(!temp));
134137
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
135138
} else if (kernel_uses_llsc) {
139+
loongson_llsc_mb();
136140
do {
137141
__asm__ __volatile__(
138142
" .set push \n"
@@ -193,6 +197,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
193197
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
194198
unsigned long temp;
195199

200+
loongson_llsc_mb();
196201
do {
197202
__asm__ __volatile__(
198203
" .set push \n"

arch/mips/include/asm/futex.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"i" (-EFAULT) \
5151
: "memory"); \
5252
} else if (cpu_has_llsc) { \
53+
loongson_llsc_mb(); \
5354
__asm__ __volatile__( \
5455
" .set push \n" \
5556
" .set noat \n" \
@@ -163,6 +164,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
163164
"i" (-EFAULT)
164165
: "memory");
165166
} else if (cpu_has_llsc) {
167+
loongson_llsc_mb();
166168
__asm__ __volatile__(
167169
"# futex_atomic_cmpxchg_inatomic \n"
168170
" .set push \n"
@@ -192,6 +194,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
192194
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
193195
"i" (-EFAULT)
194196
: "memory");
197+
loongson_llsc_mb();
195198
} else
196199
return -ENOSYS;
197200

arch/mips/include/asm/pgtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
228228
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
229229
: [global] "r" (page_global));
230230
} else if (kernel_uses_llsc) {
231+
loongson_llsc_mb();
231232
__asm__ __volatile__ (
232233
" .set push \n"
233234
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
242243
" .set pop \n"
243244
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
244245
: [global] "r" (page_global));
246+
loongson_llsc_mb();
245247
}
246248
#else /* !CONFIG_SMP */
247249
if (pte_none(*buddy))

arch/mips/loongson64/Platform

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
2323
endif
2424

2525
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
26+
27+
#
28+
# Some versions of binutils, not currently mainline as of 2019/02/04, support
29+
# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
30+
# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
31+
# description).
32+
#
33+
# We disable this in order to prevent the assembler meddling with the
34+
# instruction that labels refer to, ie. if we label an ll instruction:
35+
#
36+
# 1: ll v0, 0(a0)
37+
#
38+
# ...then with the assembler fix applied the label may actually point at a sync
39+
# instruction inserted by the assembler, and if we were using the label in an
40+
# exception table the table would no longer contain the address of the ll
41+
# instruction.
42+
#
43+
# Avoid this by explicitly disabling that assembler behaviour. If upstream
44+
# binutils does not merge support for the flag then we can revisit & remove
45+
# this later - for now it ensures vendor toolchains don't cause problems.
46+
#
47+
cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
48+
2649
#
2750
# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
2851
# as MIPS64 R2; older versions as just R1. This leaves the possibility open

arch/mips/mm/tlbex.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
932932
* to mimic that here by taking a load/istream page
933933
* fault.
934934
*/
935+
if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
936+
uasm_i_sync(p, 0);
935937
UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
936938
uasm_i_jr(p, ptr);
937939

@@ -1646,6 +1648,8 @@ static void
16461648
iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
16471649
{
16481650
#ifdef CONFIG_SMP
1651+
if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
1652+
uasm_i_sync(p, 0);
16491653
# ifdef CONFIG_PHYS_ADDR_T_64BIT
16501654
if (cpu_has_64bits)
16511655
uasm_i_lld(p, pte, 0, ptr);
@@ -2259,6 +2263,8 @@ static void build_r4000_tlb_load_handler(void)
22592263
#endif
22602264

22612265
uasm_l_nopage_tlbl(&l, p);
2266+
if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
2267+
uasm_i_sync(&p, 0);
22622268
build_restore_work_registers(&p);
22632269
#ifdef CONFIG_CPU_MICROMIPS
22642270
if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2313,6 +2319,8 @@ static void build_r4000_tlb_store_handler(void)
23132319
#endif
23142320

23152321
uasm_l_nopage_tlbs(&l, p);
2322+
if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
2323+
uasm_i_sync(&p, 0);
23162324
build_restore_work_registers(&p);
23172325
#ifdef CONFIG_CPU_MICROMIPS
23182326
if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2368,6 +2376,8 @@ static void build_r4000_tlb_modify_handler(void)
23682376
#endif
23692377

23702378
uasm_l_nopage_tlbm(&l, p);
2379+
if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
2380+
uasm_i_sync(&p, 0);
23712381
build_restore_work_registers(&p);
23722382
#ifdef CONFIG_CPU_MICROMIPS
23732383
if ((unsigned long)tlb_do_page_fault_1 & 1) {

0 commit comments

Comments
 (0)