Skip to content

Commit d6abfdb

Browse files
ktraghavendraIngo Molnar
authored andcommitted
x86/spinlocks/paravirt: Fix memory corruption on unlock
Paravirt spinlock clears slowpath flag after doing unlock. As explained by Linus currently it does: prev = *lock; add_smp(&lock->tickets.head, TICKET_LOCK_INC); /* add_smp() is a full mb() */ if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) __ticket_unlock_slowpath(lock, prev); which is *exactly* the kind of things you cannot do with spinlocks, because after you've done the "add_smp()" and released the spinlock for the fast-path, you can't access the spinlock any more. Exactly because a fast-path lock might come in, and release the whole data structure. Linus suggested that we should not do any writes to lock after unlock(), and we can move slowpath clearing to fastpath lock. So this patch implements the fix with: 1. Moving slowpath flag to head (Oleg): Unlocked locks don't care about the slowpath flag; therefore we can keep it set after the last unlock, and clear it again on the first (try)lock. -- this removes the write after unlock. note that keeping slowpath flag would result in unnecessary kicks. By moving the slowpath flag from the tail to the head ticket we also avoid the need to access both the head and tail tickets on unlock. 2. use xadd to avoid read/write after unlock that checks the need for unlock_kick (Linus): We further avoid the need for a read-after-release by using xadd; the prev head value will include the slowpath flag and indicate if we need to do PV kicking of suspended spinners -- on modern chips xadd isn't (much) more expensive than an add + load. Result: setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest) benchmark overcommit %improve kernbench 1x -0.13 kernbench 2x 0.02 dbench 1x -1.77 dbench 2x -0.63 [Jeremy: Hinted missing TICKET_LOCK_INC for kick] [Oleg: Moved slowpath flag to head, ticket_equals idea] [PeterZ: Added detailed changelog] Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Jones <drjones@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Lameter <cl@linux.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Fernando Luis Vázquez Cao <fernando_b1@lab.ntt.co.jp> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Ulrich Obergfell <uobergfe@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Cc: a.ryabinin@samsung.com Cc: dave@stgolabs.net Cc: hpa@zytor.com Cc: jasowang@redhat.com Cc: jeremy@goop.org Cc: paul.gortmaker@windriver.com Cc: riel@redhat.com Cc: tglx@linutronix.de Cc: waiman.long@hp.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20150215173043.GA7471@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 8d1e5a1 commit d6abfdb

File tree

3 files changed

+64
-56
lines changed

3 files changed

+64
-56
lines changed

arch/x86/include/asm/spinlock.h

Lines changed: 46 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
4646

4747
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
4848
{
49-
set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
49+
set_bit(0, (volatile unsigned long *)&lock->tickets.head);
5050
}
5151

5252
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
6060
}
6161

6262
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
63+
static inline int __tickets_equal(__ticket_t one, __ticket_t two)
64+
{
65+
return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
66+
}
67+
68+
static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
69+
__ticket_t head)
70+
{
71+
if (head & TICKET_SLOWPATH_FLAG) {
72+
arch_spinlock_t old, new;
73+
74+
old.tickets.head = head;
75+
new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
76+
old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
77+
new.tickets.tail = old.tickets.tail;
78+
79+
/* try to clear slowpath flag when there are no contenders */
80+
cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
81+
}
82+
}
6383

6484
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
6585
{
66-
return lock.tickets.head == lock.tickets.tail;
86+
return __tickets_equal(lock.tickets.head, lock.tickets.tail);
6787
}
6888

6989
/*
@@ -87,75 +107,52 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
87107
if (likely(inc.head == inc.tail))
88108
goto out;
89109

90-
inc.tail &= ~TICKET_SLOWPATH_FLAG;
91110
for (;;) {
92111
unsigned count = SPIN_THRESHOLD;
93112

94113
do {
95-
if (READ_ONCE(lock->tickets.head) == inc.tail)
96-
goto out;
114+
inc.head = READ_ONCE(lock->tickets.head);
115+
if (__tickets_equal(inc.head, inc.tail))
116+
goto clear_slowpath;
97117
cpu_relax();
98118
} while (--count);
99119
__ticket_lock_spinning(lock, inc.tail);
100120
}
101-
out: barrier(); /* make sure nothing creeps before the lock is taken */
121+
clear_slowpath:
122+
__ticket_check_and_clear_slowpath(lock, inc.head);
123+
out:
124+
barrier(); /* make sure nothing creeps before the lock is taken */
102125
}
103126

104127
static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
105128
{
106129
arch_spinlock_t old, new;
107130

108131
old.tickets = READ_ONCE(lock->tickets);
109-
if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
132+
if (!__tickets_equal(old.tickets.head, old.tickets.tail))
110133
return 0;
111134

112135
new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
136+
new.head_tail &= ~TICKET_SLOWPATH_FLAG;
113137

114138
/* cmpxchg is a full barrier, so nothing can move before it */
115139
return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
116140
}
117141

118-
static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
119-
arch_spinlock_t old)
120-
{
121-
arch_spinlock_t new;
122-
123-
BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
124-
125-
/* Perform the unlock on the "before" copy */
126-
old.tickets.head += TICKET_LOCK_INC;
127-
128-
/* Clear the slowpath flag */
129-
new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
130-
131-
/*
132-
* If the lock is uncontended, clear the flag - use cmpxchg in
133-
* case it changes behind our back though.
134-
*/
135-
if (new.tickets.head != new.tickets.tail ||
136-
cmpxchg(&lock->head_tail, old.head_tail,
137-
new.head_tail) != old.head_tail) {
138-
/*
139-
* Lock still has someone queued for it, so wake up an
140-
* appropriate waiter.
141-
*/
142-
__ticket_unlock_kick(lock, old.tickets.head);
143-
}
144-
}
145-
146142
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147143
{
148144
if (TICKET_SLOWPATH_FLAG &&
149-
static_key_false(&paravirt_ticketlocks_enabled)) {
150-
arch_spinlock_t prev;
145+
static_key_false(&paravirt_ticketlocks_enabled)) {
146+
__ticket_t head;
151147

152-
prev = *lock;
153-
add_smp(&lock->tickets.head, TICKET_LOCK_INC);
148+
BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
154149

155-
/* add_smp() is a full mb() */
150+
head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
156151

157-
if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
158-
__ticket_unlock_slowpath(lock, prev);
152+
if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
153+
head &= ~TICKET_SLOWPATH_FLAG;
154+
__ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
155+
}
159156
} else
160157
__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
161158
}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164161
{
165162
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166163

167-
return tmp.tail != tmp.head;
164+
return !__tickets_equal(tmp.tail, tmp.head);
168165
}
169166

170167
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171168
{
172169
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173170

174-
return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
171+
tmp.head &= ~TICKET_SLOWPATH_FLAG;
172+
return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
175173
}
176174
#define arch_spin_is_contended arch_spin_is_contended
177175

@@ -183,16 +181,16 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
183181

184182
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
185183
{
186-
__ticket_t head = ACCESS_ONCE(lock->tickets.head);
184+
__ticket_t head = READ_ONCE(lock->tickets.head);
187185

188186
for (;;) {
189-
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
187+
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
190188
/*
191189
* We need to check "unlocked" in a loop, tmp.head == head
192190
* can be false positive because of overflow.
193191
*/
194-
if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
195-
tmp.head != head)
192+
if (__tickets_equal(tmp.head, tmp.tail) ||
193+
!__tickets_equal(tmp.head, head))
196194
break;
197195

198196
cpu_relax();

arch/x86/kernel/kvm.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ static inline void check_zero(void)
609609
u8 ret;
610610
u8 old;
611611

612-
old = ACCESS_ONCE(zero_stats);
612+
old = READ_ONCE(zero_stats);
613613
if (unlikely(old)) {
614614
ret = cmpxchg(&zero_stats, old, 0);
615615
/* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
727727
int cpu;
728728
u64 start;
729729
unsigned long flags;
730+
__ticket_t head;
730731

731732
if (in_nmi())
732733
return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
768769
*/
769770
__ticket_enter_slowpath(lock);
770771

772+
/* make sure enter_slowpath, which is atomic does not cross the read */
773+
smp_mb__after_atomic();
774+
771775
/*
772776
* check again make sure it didn't become free while
773777
* we weren't looking.
774778
*/
775-
if (ACCESS_ONCE(lock->tickets.head) == want) {
779+
head = READ_ONCE(lock->tickets.head);
780+
if (__tickets_equal(head, want)) {
776781
add_stats(TAKEN_SLOW_PICKUP, 1);
777782
goto out;
778783
}
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
803808
add_stats(RELEASED_SLOW, 1);
804809
for_each_cpu(cpu, &waiting_cpus) {
805810
const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
806-
if (ACCESS_ONCE(w->lock) == lock &&
807-
ACCESS_ONCE(w->want) == ticket) {
811+
if (READ_ONCE(w->lock) == lock &&
812+
READ_ONCE(w->want) == ticket) {
808813
add_stats(RELEASED_SLOW_KICKED, 1);
809814
kvm_kick_cpu(cpu);
810815
break;

arch/x86/xen/spinlock.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ static u8 zero_stats;
4141
static inline void check_zero(void)
4242
{
4343
u8 ret;
44-
u8 old = ACCESS_ONCE(zero_stats);
44+
u8 old = READ_ONCE(zero_stats);
4545
if (unlikely(old)) {
4646
ret = cmpxchg(&zero_stats, old, 0);
4747
/* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
112112
struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
113113
int cpu = smp_processor_id();
114114
u64 start;
115+
__ticket_t head;
115116
unsigned long flags;
116117

117118
/* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
159160
*/
160161
__ticket_enter_slowpath(lock);
161162

163+
/* make sure enter_slowpath, which is atomic does not cross the read */
164+
smp_mb__after_atomic();
165+
162166
/*
163167
* check again make sure it didn't become free while
164168
* we weren't looking
165169
*/
166-
if (ACCESS_ONCE(lock->tickets.head) == want) {
170+
head = READ_ONCE(lock->tickets.head);
171+
if (__tickets_equal(head, want)) {
167172
add_stats(TAKEN_SLOW_PICKUP, 1);
168173
goto out;
169174
}
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
204209
const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
205210

206211
/* Make sure we read lock before want */
207-
if (ACCESS_ONCE(w->lock) == lock &&
208-
ACCESS_ONCE(w->want) == next) {
212+
if (READ_ONCE(w->lock) == lock &&
213+
READ_ONCE(w->want) == next) {
209214
add_stats(RELEASED_SLOW_KICKED, 1);
210215
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
211216
break;

0 commit comments

Comments
 (0)