Skip to content

Commit 66e1c94

Browse files
committed
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/pti updates from Thomas Gleixner: "A mixed bag of fixes and updates for the ghosts which are hunting us. The scheduler fixes have been pulled into that branch to avoid conflicts. - A set of fixes to address a khread_parkme() race which caused lost wakeups and loss of state. - A deadlock fix for stop_machine() solved by moving the wakeups outside of the stopper_lock held region. - A set of Spectre V1 array access restrictions. The possible problematic spots were discuvered by Dan Carpenters new checks in smatch. - Removal of an unused file which was forgotten when the rest of that functionality was removed" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Remove unused file perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map() perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_* perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[] sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Introduce set_special_state() kthread, sched/wait: Fix kthread_parkme() completion issue kthread, sched/wait: Fix kthread_parkme() wait-loop sched/fair: Fix the update of blocked load when newly idle stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
2 parents 86a4ac4 + e0f6d1a commit 66e1c94

File tree

14 files changed

+153
-78
lines changed

14 files changed

+153
-78
lines changed

arch/x86/entry/vdso/vdso32/vdso-fakesections.c

Lines changed: 0 additions & 1 deletion
This file was deleted.

arch/x86/events/core.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <linux/cpu.h>
2828
#include <linux/bitops.h>
2929
#include <linux/device.h>
30+
#include <linux/nospec.h>
3031

3132
#include <asm/apic.h>
3233
#include <asm/stacktrace.h>
@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
304305

305306
config = attr->config;
306307

307-
cache_type = (config >> 0) & 0xff;
308+
cache_type = (config >> 0) & 0xff;
308309
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
309310
return -EINVAL;
311+
cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
310312

311313
cache_op = (config >> 8) & 0xff;
312314
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
313315
return -EINVAL;
316+
cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
314317

315318
cache_result = (config >> 16) & 0xff;
316319
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
317320
return -EINVAL;
321+
cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
318322

319323
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
320324

@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
421425
if (attr->config >= x86_pmu.max_events)
422426
return -EINVAL;
423427

428+
attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
429+
424430
/*
425431
* The generic map:
426432
*/

arch/x86/events/intel/cstate.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
#include <linux/module.h>
9393
#include <linux/slab.h>
9494
#include <linux/perf_event.h>
95+
#include <linux/nospec.h>
9596
#include <asm/cpu_device_id.h>
9697
#include <asm/intel-family.h>
9798
#include "../perf_event.h"
@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
302303
} else if (event->pmu == &cstate_pkg_pmu) {
303304
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
304305
return -EINVAL;
306+
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
305307
if (!pkg_msr[cfg].attr)
306308
return -EINVAL;
307309
event->hw.event_base = pkg_msr[cfg].msr;

arch/x86/events/msr.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <linux/perf_event.h>
3+
#include <linux/nospec.h>
34
#include <asm/intel-family.h>
45

56
enum perf_msr_id {
@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
158159
if (event->attr.type != event->pmu->type)
159160
return -ENOENT;
160161

161-
if (cfg >= PERF_MSR_EVENT_MAX)
162-
return -EINVAL;
163-
164162
/* unsupported modes and filters */
165163
if (event->attr.exclude_user ||
166164
event->attr.exclude_kernel ||
@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
171169
event->attr.sample_period) /* no sampling */
172170
return -EINVAL;
173171

172+
if (cfg >= PERF_MSR_EVENT_MAX)
173+
return -EINVAL;
174+
175+
cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
176+
174177
if (!msr[cfg].attr)
175178
return -EINVAL;
176179

include/linux/kthread.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
6262
int kthread_park(struct task_struct *k);
6363
void kthread_unpark(struct task_struct *k);
6464
void kthread_parkme(void);
65+
void kthread_park_complete(struct task_struct *k);
6566

6667
int kthreadd(void *unused);
6768
extern struct task_struct *kthreadd_task;

include/linux/sched.h

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,36 @@ struct task_group;
112112

113113
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
114114

115+
/*
116+
* Special states are those that do not use the normal wait-loop pattern. See
117+
* the comment with set_special_state().
118+
*/
119+
#define is_special_task_state(state) \
120+
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
121+
115122
#define __set_current_state(state_value) \
116123
do { \
124+
WARN_ON_ONCE(is_special_task_state(state_value));\
117125
current->task_state_change = _THIS_IP_; \
118126
current->state = (state_value); \
119127
} while (0)
128+
120129
#define set_current_state(state_value) \
121130
do { \
131+
WARN_ON_ONCE(is_special_task_state(state_value));\
122132
current->task_state_change = _THIS_IP_; \
123133
smp_store_mb(current->state, (state_value)); \
124134
} while (0)
125135

136+
#define set_special_state(state_value) \
137+
do { \
138+
unsigned long flags; /* may shadow */ \
139+
WARN_ON_ONCE(!is_special_task_state(state_value)); \
140+
raw_spin_lock_irqsave(&current->pi_lock, flags); \
141+
current->task_state_change = _THIS_IP_; \
142+
current->state = (state_value); \
143+
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
144+
} while (0)
126145
#else
127146
/*
128147
* set_current_state() includes a barrier so that the write of current->state
@@ -144,8 +163,8 @@ struct task_group;
144163
*
145164
* The above is typically ordered against the wakeup, which does:
146165
*
147-
* need_sleep = false;
148-
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
166+
* need_sleep = false;
167+
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
149168
*
150169
* Where wake_up_state() (and all other wakeup primitives) imply enough
151170
* barriers to order the store of the variable against wakeup.
@@ -154,12 +173,33 @@ struct task_group;
154173
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
155174
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
156175
*
157-
* This is obviously fine, since they both store the exact same value.
176+
* However, with slightly different timing the wakeup TASK_RUNNING store can
177+
* also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
178+
* a problem either because that will result in one extra go around the loop
179+
* and our @cond test will save the day.
158180
*
159181
* Also see the comments of try_to_wake_up().
160182
*/
161-
#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
162-
#define set_current_state(state_value) smp_store_mb(current->state, (state_value))
183+
#define __set_current_state(state_value) \
184+
current->state = (state_value)
185+
186+
#define set_current_state(state_value) \
187+
smp_store_mb(current->state, (state_value))
188+
189+
/*
190+
* set_special_state() should be used for those states when the blocking task
191+
* can not use the regular condition based wait-loop. In that case we must
192+
* serialize against wakeups such that any possible in-flight TASK_RUNNING stores
193+
* will not collide with our state change.
194+
*/
195+
#define set_special_state(state_value) \
196+
do { \
197+
unsigned long flags; /* may shadow */ \
198+
raw_spin_lock_irqsave(&current->pi_lock, flags); \
199+
current->state = (state_value); \
200+
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
201+
} while (0)
202+
163203
#endif
164204

165205
/* Task command name length: */

include/linux/sched/signal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
280280
{
281281
spin_lock_irq(&current->sighand->siglock);
282282
if (current->jobctl & JOBCTL_STOP_DEQUEUED)
283-
__set_current_state(TASK_STOPPED);
283+
set_special_state(TASK_STOPPED);
284284
spin_unlock_irq(&current->sighand->siglock);
285285

286286
schedule();

kernel/events/ring_buffer.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/slab.h>
1515
#include <linux/circ_buf.h>
1616
#include <linux/poll.h>
17+
#include <linux/nospec.h>
1718

1819
#include "internal.h"
1920

@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
867868
return NULL;
868869

869870
/* AUX space */
870-
if (pgoff >= rb->aux_pgoff)
871-
return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
871+
if (pgoff >= rb->aux_pgoff) {
872+
int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
873+
return virt_to_page(rb->aux_pages[aux_pgoff]);
874+
}
872875
}
873876

874877
return __perf_mmap_to_page(rb, pgoff);

kernel/kthread.c

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
5555
KTHREAD_IS_PER_CPU = 0,
5656
KTHREAD_SHOULD_STOP,
5757
KTHREAD_SHOULD_PARK,
58-
KTHREAD_IS_PARKED,
5958
};
6059

6160
static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
177176

178177
static void __kthread_parkme(struct kthread *self)
179178
{
180-
__set_current_state(TASK_PARKED);
181-
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
182-
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
183-
complete(&self->parked);
179+
for (;;) {
180+
set_current_state(TASK_PARKED);
181+
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
182+
break;
184183
schedule();
185-
__set_current_state(TASK_PARKED);
186184
}
187-
clear_bit(KTHREAD_IS_PARKED, &self->flags);
188185
__set_current_state(TASK_RUNNING);
189186
}
190187

@@ -194,6 +191,11 @@ void kthread_parkme(void)
194191
}
195192
EXPORT_SYMBOL_GPL(kthread_parkme);
196193

194+
void kthread_park_complete(struct task_struct *k)
195+
{
196+
complete(&to_kthread(k)->parked);
197+
}
198+
197199
static int kthread(void *_create)
198200
{
199201
/* Copy data: it's on kthread's stack */
@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
450452
{
451453
struct kthread *kthread = to_kthread(k);
452454

453-
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
454455
/*
455-
* We clear the IS_PARKED bit here as we don't wait
456-
* until the task has left the park code. So if we'd
457-
* park before that happens we'd see the IS_PARKED bit
458-
* which might be about to be cleared.
456+
* Newly created kthread was parked when the CPU was offline.
457+
* The binding was lost and we need to set it again.
459458
*/
460-
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
461-
/*
462-
* Newly created kthread was parked when the CPU was offline.
463-
* The binding was lost and we need to set it again.
464-
*/
465-
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
466-
__kthread_bind(k, kthread->cpu, TASK_PARKED);
467-
wake_up_state(k, TASK_PARKED);
468-
}
459+
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
460+
__kthread_bind(k, kthread->cpu, TASK_PARKED);
461+
462+
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
463+
wake_up_state(k, TASK_PARKED);
469464
}
470465
EXPORT_SYMBOL_GPL(kthread_unpark);
471466

@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
488483
if (WARN_ON(k->flags & PF_EXITING))
489484
return -ENOSYS;
490485

491-
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
492-
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
493-
if (k != current) {
494-
wake_up_process(k);
495-
wait_for_completion(&kthread->parked);
496-
}
486+
if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
487+
return -EBUSY;
488+
489+
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
490+
if (k != current) {
491+
wake_up_process(k);
492+
wait_for_completion(&kthread->parked);
497493
}
498494

499495
return 0;

kernel/sched/autogroup.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*
33
* Auto-group scheduling implementation:
44
*/
5+
#include <linux/nospec.h>
56
#include "sched.h"
67

78
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
209210
static unsigned long next = INITIAL_JIFFIES;
210211
struct autogroup *ag;
211212
unsigned long shares;
212-
int err;
213+
int err, idx;
213214

214215
if (nice < MIN_NICE || nice > MAX_NICE)
215216
return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
227228

228229
next = HZ / 10 + jiffies;
229230
ag = autogroup_task_get(p);
230-
shares = scale_load(sched_prio_to_weight[nice + 20]);
231+
232+
idx = array_index_nospec(nice + 20, 40);
233+
shares = scale_load(sched_prio_to_weight[idx]);
231234

232235
down_write(&ag->lock);
233236
err = sched_group_set_shares(ag->tg, shares);

0 commit comments

Comments
 (0)