Skip to content

Commit 9dae0a3

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A bunch of fixes for perf and kprobes: - revert a commit that caused a perf group regression - silence dmesg spam - fix kprobe probing errors on ia64 and ppc64 - filter kprobe faults from userspace - lockdep fix for perf exit path - prevent perf #GP in KVM guest - correct perf event and filters" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: kprobes: Fix "Failed to find blacklist" probing errors on ia64 and ppc64 kprobes/x86: Don't try to resolve kprobe faults from userspace perf/x86/intel: Avoid spamming kernel log for BTS buffer failure perf/x86/intel: Protect LBR and extra_regs against KVM lying perf: Fix lockdep warning on process exit perf/x86/intel/uncore: Fix SNB-EP/IVT Cbox filter mappings perf/x86/intel: Use proper dTLB-load-misses event on IvyBridge perf: Revert ("perf: Always destroy groups on exit")
2 parents 43a255c + d81b425 commit 9dae0a3

File tree

8 files changed

+130
-20
lines changed

8 files changed

+130
-20
lines changed

arch/x86/kernel/cpu/perf_event.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
118118
continue;
119119
if (event->attr.config1 & ~er->valid_mask)
120120
return -EINVAL;
121+
/* Check if the extra msrs can be safely accessed*/
122+
if (!er->extra_msr_access)
123+
return -ENXIO;
121124

122125
reg->idx = er->idx;
123126
reg->config = event->attr.config1;

arch/x86/kernel/cpu/perf_event.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,16 @@ struct extra_reg {
295295
u64 config_mask;
296296
u64 valid_mask;
297297
int idx; /* per_xxx->regs[] reg index */
298+
bool extra_msr_access;
298299
};
299300

300301
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
301-
.event = (e), \
302-
.msr = (ms), \
303-
.config_mask = (m), \
304-
.valid_mask = (vm), \
305-
.idx = EXTRA_REG_##i, \
302+
.event = (e), \
303+
.msr = (ms), \
304+
.config_mask = (m), \
305+
.valid_mask = (vm), \
306+
.idx = EXTRA_REG_##i, \
307+
.extra_msr_access = true, \
306308
}
307309

308310
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \

arch/x86/kernel/cpu/perf_event_intel.c

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(void)
21822182
}
21832183
}
21842184

2185+
/*
2186+
* Under certain circumstances, access certain MSR may cause #GP.
2187+
* The function tests if the input MSR can be safely accessed.
2188+
*/
2189+
static bool check_msr(unsigned long msr, u64 mask)
2190+
{
2191+
u64 val_old, val_new, val_tmp;
2192+
2193+
/*
2194+
* Read the current value, change it and read it back to see if it
2195+
* matches, this is needed to detect certain hardware emulators
2196+
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
2197+
*/
2198+
if (rdmsrl_safe(msr, &val_old))
2199+
return false;
2200+
2201+
/*
2202+
* Only change the bits which can be updated by wrmsrl.
2203+
*/
2204+
val_tmp = val_old ^ mask;
2205+
if (wrmsrl_safe(msr, val_tmp) ||
2206+
rdmsrl_safe(msr, &val_new))
2207+
return false;
2208+
2209+
if (val_new != val_tmp)
2210+
return false;
2211+
2212+
/* Here it's sure that the MSR can be safely accessed.
2213+
* Restore the old value and return.
2214+
*/
2215+
wrmsrl(msr, val_old);
2216+
2217+
return true;
2218+
}
2219+
21852220
static __init void intel_sandybridge_quirk(void)
21862221
{
21872222
x86_pmu.check_microcode = intel_snb_check_microcode;
@@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void)
22712306
union cpuid10_ebx ebx;
22722307
struct event_constraint *c;
22732308
unsigned int unused;
2274-
int version;
2309+
struct extra_reg *er;
2310+
int version, i;
22752311

22762312
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
22772313
switch (boot_cpu_data.x86) {
@@ -2474,6 +2510,9 @@ __init int intel_pmu_init(void)
24742510
case 62: /* IvyBridge EP */
24752511
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
24762512
sizeof(hw_cache_event_ids));
2513+
/* dTLB-load-misses on IVB is different than SNB */
2514+
hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
2515+
24772516
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
24782517
sizeof(hw_cache_extra_regs));
24792518

@@ -2574,6 +2613,34 @@ __init int intel_pmu_init(void)
25742613
}
25752614
}
25762615

2616+
/*
2617+
* Access LBR MSR may cause #GP under certain circumstances.
2618+
* E.g. KVM doesn't support LBR MSR
2619+
* Check all LBT MSR here.
2620+
* Disable LBR access if any LBR MSRs can not be accessed.
2621+
*/
2622+
if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
2623+
x86_pmu.lbr_nr = 0;
2624+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
2625+
if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
2626+
check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
2627+
x86_pmu.lbr_nr = 0;
2628+
}
2629+
2630+
/*
2631+
* Access extra MSR may cause #GP under certain circumstances.
2632+
* E.g. KVM doesn't support offcore event
2633+
* Check all extra_regs here.
2634+
*/
2635+
if (x86_pmu.extra_regs) {
2636+
for (er = x86_pmu.extra_regs; er->msr; er++) {
2637+
er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
2638+
/* Disable LBR select mapping */
2639+
if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
2640+
x86_pmu.lbr_sel_map = NULL;
2641+
}
2642+
}
2643+
25772644
/* Support full width counters using alternative MSR range */
25782645
if (x86_pmu.intel_cap.full_width_write) {
25792646
x86_pmu.max_period = x86_pmu.cntval_mask;

arch/x86/kernel/cpu/perf_event_intel_ds.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu)
311311
if (!x86_pmu.bts)
312312
return 0;
313313

314-
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
315-
if (unlikely(!buffer))
314+
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
315+
if (unlikely(!buffer)) {
316+
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
316317
return -ENOMEM;
318+
}
317319

318320
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
319321
thresh = max / 16;

arch/x86/kernel/cpu/perf_event_intel_uncore.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
550550
SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
551551
SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
552552
SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
553-
SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc),
554-
SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc),
553+
SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
554+
SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
555555
SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
556556
SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
557557
SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
558558
SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
559559
SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
560560
SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
561-
SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc),
562-
SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc),
561+
SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
562+
SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
563563
SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
564564
SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
565565
SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
@@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
12221222
SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
12231223
SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
12241224
SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
1225+
12251226
SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
12261227
SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
12271228
SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
@@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
12451246
SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
12461247
SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
12471248
SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
1248-
SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
1249+
SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
12491250
SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
12501251
SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
12511252
SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),

arch/x86/kernel/kprobes/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
574574
struct kprobe *p;
575575
struct kprobe_ctlblk *kcb;
576576

577+
if (user_mode_vm(regs))
578+
return 0;
579+
577580
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
578581
/*
579582
* We don't want to be preempted for the entire

kernel/events/core.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event,
74587458
struct perf_event_context *child_ctx,
74597459
struct task_struct *child)
74607460
{
7461-
perf_remove_from_context(child_event, true);
7461+
/*
7462+
* Do not destroy the 'original' grouping; because of the context
7463+
* switch optimization the original events could've ended up in a
7464+
* random child task.
7465+
*
7466+
* If we were to destroy the original group, all group related
7467+
* operations would cease to function properly after this random
7468+
* child dies.
7469+
*
7470+
* Do destroy all inherited groups, we don't care about those
7471+
* and being thorough is better.
7472+
*/
7473+
perf_remove_from_context(child_event, !!child_event->parent);
74627474

74637475
/*
74647476
* It can happen that the parent exits first, and has events
@@ -7474,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event,
74747486
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
74757487
{
74767488
struct perf_event *child_event, *next;
7477-
struct perf_event_context *child_ctx;
7489+
struct perf_event_context *child_ctx, *parent_ctx;
74787490
unsigned long flags;
74797491

74807492
if (likely(!child->perf_event_ctxp[ctxn])) {
@@ -7499,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
74997511
raw_spin_lock(&child_ctx->lock);
75007512
task_ctx_sched_out(child_ctx);
75017513
child->perf_event_ctxp[ctxn] = NULL;
7514+
7515+
/*
7516+
* In order to avoid freeing: child_ctx->parent_ctx->task
7517+
* under perf_event_context::lock, grab another reference.
7518+
*/
7519+
parent_ctx = child_ctx->parent_ctx;
7520+
if (parent_ctx)
7521+
get_ctx(parent_ctx);
7522+
75027523
/*
75037524
* If this context is a clone; unclone it so it can't get
75047525
* swapped to another process while we're removing all
@@ -7508,6 +7529,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
75087529
update_context_time(child_ctx);
75097530
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
75107531

7532+
/*
7533+
* Now that we no longer hold perf_event_context::lock, drop
7534+
* our extra child_ctx->parent_ctx reference.
7535+
*/
7536+
if (parent_ctx)
7537+
put_ctx(parent_ctx);
7538+
75117539
/*
75127540
* Report the task dead after unscheduling the events so that we
75137541
* won't get any samples after PERF_RECORD_EXIT. We can however still

kernel/kprobes.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
20372037
{
20382038
unsigned long *iter;
20392039
struct kprobe_blacklist_entry *ent;
2040-
unsigned long offset = 0, size = 0;
2040+
unsigned long entry, offset = 0, size = 0;
20412041

20422042
for (iter = start; iter < end; iter++) {
2043-
if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
2044-
pr_err("Failed to find blacklist %p\n", (void *)*iter);
2043+
entry = arch_deref_entry_point((void *)*iter);
2044+
2045+
if (!kernel_text_address(entry) ||
2046+
!kallsyms_lookup_size_offset(entry, &size, &offset)) {
2047+
pr_err("Failed to find blacklist at %p\n",
2048+
(void *)entry);
20452049
continue;
20462050
}
20472051

20482052
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
20492053
if (!ent)
20502054
return -ENOMEM;
2051-
ent->start_addr = *iter;
2052-
ent->end_addr = *iter + size;
2055+
ent->start_addr = entry;
2056+
ent->end_addr = entry + size;
20532057
INIT_LIST_HEAD(&ent->list);
20542058
list_add_tail(&ent->list, &kprobe_blacklist);
20552059
}

0 commit comments

Comments
 (0)