Skip to content

Commit 3032f0c

Browse files
committed
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC instructions, which is not required per lkmm ACQ/REL semantics. smp_mb() is only needed _after_ lock and _before_ unlock. So remove the extra barriers. The reason they were there was mainly historical. At the time of initial SMP Linux bringup on HS38 cores, I was too conservative, given the fluidity of both hw and sw. The last attempt to ditch the extra barrier showed some hackbench regression which is apparently not the case now (atleast for LLSC case, read on...) - EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra smp_mb(), not due to lkmm, but due to some hardware shenanigans. W/o that, hackbench triggers RCU stall splat so extra DMB is retained !LLSC based systems are not realistic Linux sstem anyways so they can afford to be a nit suboptimal ;-) | [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done | Running with 10 groups 400 process | INFO: task hackbench:158 blocked for more than 10 seconds. | Not tainted 4.20.0-00005-g96b18288a88e-dirty torvalds#117 | "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. | hackbench D 0 158 135 0x00000000 | | Stack Trace: | watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469] | Modules linked in: | Path: (null) | CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty | | [ECR ]: 0x00000000 => Check Programmer's Manual | [EFA ]: 0x00000000 | [BLINK ]: do_exit+0x4a6/0x7d0 | [ERET ]: _raw_write_unlock_irq+0x44/0x5c - And while at it, remove the extar smp_mb() from EX based arch_read_trylock() since the spin lock there guarantees a full barrier anyways - For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz) ---- before ---- | | [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done | Running with 10 groups 400 threads | Time: 16.253 | Time: 16.445 | Time: 16.590 | Time: 16.721 | Time: 16.544 ---- after ---- | | [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done | Running with 10 groups 400 threads | Time: 15.638 | Time: 15.730 | Time: 15.870 | Time: 15.842 | Time: 15.729 Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
1 parent 6dd356d commit 3032f0c

File tree

1 file changed

+14
-35
lines changed

1 file changed

+14
-35
lines changed

arch/arc/include/asm/spinlock.h

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
2121
{
2222
unsigned int val;
2323

24-
smp_mb();
25-
2624
__asm__ __volatile__(
2725
"1: llock %[val], [%[slock]] \n"
2826
" breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
@@ -34,6 +32,14 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
3432
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
3533
: "memory", "cc");
3634

35+
/*
36+
* ACQUIRE barrier to ensure load/store after taking the lock
37+
* don't "bleed-up" out of the critical section (leak-in is allowed)
38+
* http://www.spinics.net/lists/kernel/msg2010409.html
39+
*
40+
* ARCv2 only has load-load, store-store and all-all barrier
41+
* thus need the full all-all barrier
42+
*/
3743
smp_mb();
3844
}
3945

@@ -42,8 +48,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
4248
{
4349
unsigned int val, got_it = 0;
4450

45-
smp_mb();
46-
4751
__asm__ __volatile__(
4852
"1: llock %[val], [%[slock]] \n"
4953
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
@@ -67,9 +71,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
6771
{
6872
smp_mb();
6973

70-
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
71-
72-
smp_mb();
74+
WRITE_ONCE(lock->slock, __ARCH_SPIN_LOCK_UNLOCKED__);
7375
}
7476

7577
/*
@@ -81,8 +83,6 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
8183
{
8284
unsigned int val;
8385

84-
smp_mb();
85-
8686
/*
8787
* zero means writer holds the lock exclusively, deny Reader.
8888
* Otherwise grant lock to first/subseq reader
@@ -113,8 +113,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
113113
{
114114
unsigned int val, got_it = 0;
115115

116-
smp_mb();
117-
118116
__asm__ __volatile__(
119117
"1: llock %[val], [%[rwlock]] \n"
120118
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
@@ -140,8 +138,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
140138
{
141139
unsigned int val;
142140

143-
smp_mb();
144-
145141
/*
146142
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
147143
* deny writer. Otherwise if unlocked grant to writer
@@ -175,8 +171,6 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
175171
{
176172
unsigned int val, got_it = 0;
177173

178-
smp_mb();
179-
180174
__asm__ __volatile__(
181175
"1: llock %[val], [%[rwlock]] \n"
182176
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
@@ -217,17 +211,13 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
217211
: [val] "=&r" (val)
218212
: [rwlock] "r" (&(rw->counter))
219213
: "memory", "cc");
220-
221-
smp_mb();
222214
}
223215

224216
static inline void arch_write_unlock(arch_rwlock_t *rw)
225217
{
226218
smp_mb();
227219

228-
rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
229-
230-
smp_mb();
220+
WRITE_ONCE(rw->counter, __ARCH_RW_LOCK_UNLOCKED__);
231221
}
232222

233223
#else /* !CONFIG_ARC_HAS_LLSC */
@@ -237,10 +227,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
237227
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
238228

239229
/*
240-
* This smp_mb() is technically superfluous, we only need the one
241-
* after the lock for providing the ACQUIRE semantics.
242-
* However doing the "right" thing was regressing hackbench
243-
* so keeping this, pending further investigation
230+
* Per lkmm, smp_mb() is only required after _lock (and before_unlock)
231+
* for ACQ and REL semantics respectively. However EX based spinlocks
232+
* need the extra smp_mb to workaround a hardware quirk.
244233
*/
245234
smp_mb();
246235

@@ -257,14 +246,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
257246
#endif
258247
: "memory");
259248

260-
/*
261-
* ACQUIRE barrier to ensure load/store after taking the lock
262-
* don't "bleed-up" out of the critical section (leak-in is allowed)
263-
* http://www.spinics.net/lists/kernel/msg2010409.html
264-
*
265-
* ARCv2 only has load-load, store-store and all-all barrier
266-
* thus need the full all-all barrier
267-
*/
268249
smp_mb();
269250
}
270251

@@ -309,8 +290,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
309290
: "memory");
310291

311292
/*
312-
* superfluous, but keeping for now - see pairing version in
313-
* arch_spin_lock above
293+
* see pairing version/comment in arch_spin_lock above
314294
*/
315295
smp_mb();
316296
}
@@ -344,7 +324,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
344324
arch_spin_unlock(&(rw->lock_mutex));
345325
local_irq_restore(flags);
346326

347-
smp_mb();
348327
return ret;
349328
}
350329

0 commit comments

Comments
 (0)