Skip to content

Commit ade0899

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "This tree includes some late late perf items that missed the first round: tools: - Bash auto completion improvements, now we can auto complete the tools long options, tracepoint event names, etc, from Namhyung Kim. - Look up thread using tid instead of pid in 'perf sched'. - Move global variables into a perf_kvm struct, from David Ahern. - Hists refactorings, preparatory for improved 'diff' command, from Jiri Olsa. - Hists refactorings, preparatory for event group viewieng work, from Namhyung Kim. - Remove double negation on optional feature macro definitions, from Namhyung Kim. - Remove several cases of needless global variables, on most builtins. - misc fixes kernel: - sysfs support for IBS on AMD CPUs, from Robert Richter. - Support for an upcoming Intel CPU, the Xeon-Phi / Knights Corner HPC blade PMU, from Vince Weaver. - misc fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits) perf: Fix perf_cgroup_switch for sw-events perf: Clarify perf_cpu_context::active_pmu usage by renaming it to ::unique_pmu perf/AMD/IBS: Add sysfs support perf hists: Add more helpers for hist entry stat perf hists: Move he->stat.nr_events initialization to a template perf hists: Introduce struct he_stat perf diff: Removing the total_period argument from output code perf tool: Add hpp interface to enable/disable hpp column perf tools: Removing hists pair argument from output path perf hists: Separate overhead and baseline columns perf diff: Refactor diff displacement possition info perf hists: Add struct hists pointer to struct hist_entry perf tools: Complete tracepoint event names perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU perf evlist: Remove some unused methods perf evlist: Introduce add_newtp method perf kvm: Move global variables into a perf_kvm struct perf tools: Convert to BACKTRACE_SUPPORT perf tools: Long option completion support for each subcommands perf tools: Complete long option names of perf command ...
2 parents 871a059 + 95cf59e commit ade0899

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1517
-1198
lines changed

arch/x86/include/asm/msr-index.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@
121121
#define MSR_P6_EVNTSEL0 0x00000186
122122
#define MSR_P6_EVNTSEL1 0x00000187
123123

124+
#define MSR_KNC_PERFCTR0 0x00000020
125+
#define MSR_KNC_PERFCTR1 0x00000021
126+
#define MSR_KNC_EVNTSEL0 0x00000028
127+
#define MSR_KNC_EVNTSEL1 0x00000029
128+
124129
/* AMD64 MSRs. Not complete. See the architecture manual for a more
125130
complete list. */
126131

arch/x86/kernel/cpu/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
3232

3333
ifdef CONFIG_PERF_EVENTS
3434
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
35-
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
35+
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
3636
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
3737
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
3838
endif

arch/x86/kernel/cpu/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
626626

627627
int p6_pmu_init(void);
628628

629+
int knc_pmu_init(void);
630+
629631
#else /* CONFIG_CPU_SUP_INTEL */
630632

631633
static inline void reserve_ds_buffers(void)

arch/x86/kernel/cpu/perf_event_amd_ibs.c

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
4141
};
4242

4343
struct perf_ibs {
44-
struct pmu pmu;
45-
unsigned int msr;
46-
u64 config_mask;
47-
u64 cnt_mask;
48-
u64 enable_mask;
49-
u64 valid_mask;
50-
u64 max_period;
51-
unsigned long offset_mask[1];
52-
int offset_max;
53-
struct cpu_perf_ibs __percpu *pcpu;
54-
u64 (*get_count)(u64 config);
44+
struct pmu pmu;
45+
unsigned int msr;
46+
u64 config_mask;
47+
u64 cnt_mask;
48+
u64 enable_mask;
49+
u64 valid_mask;
50+
u64 max_period;
51+
unsigned long offset_mask[1];
52+
int offset_max;
53+
struct cpu_perf_ibs __percpu *pcpu;
54+
55+
struct attribute **format_attrs;
56+
struct attribute_group format_group;
57+
const struct attribute_group *attr_groups[2];
58+
59+
u64 (*get_count)(u64 config);
5560
};
5661

5762
struct perf_ibs_data {
@@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
446451

447452
static void perf_ibs_read(struct perf_event *event) { }
448453

454+
PMU_FORMAT_ATTR(rand_en, "config:57");
455+
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
456+
457+
static struct attribute *ibs_fetch_format_attrs[] = {
458+
&format_attr_rand_en.attr,
459+
NULL,
460+
};
461+
462+
static struct attribute *ibs_op_format_attrs[] = {
463+
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
464+
NULL,
465+
};
466+
449467
static struct perf_ibs perf_ibs_fetch = {
450468
.pmu = {
451469
.task_ctx_nr = perf_invalid_context,
@@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
465483
.max_period = IBS_FETCH_MAX_CNT << 4,
466484
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
467485
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
486+
.format_attrs = ibs_fetch_format_attrs,
468487

469488
.get_count = get_ibs_fetch_count,
470489
};
@@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
488507
.max_period = IBS_OP_MAX_CNT << 4,
489508
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
490509
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
510+
.format_attrs = ibs_op_format_attrs,
491511

492512
.get_count = get_ibs_op_count,
493513
};
@@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
597617

598618
perf_ibs->pcpu = pcpu;
599619

620+
/* register attributes */
621+
if (perf_ibs->format_attrs[0]) {
622+
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
623+
perf_ibs->format_group.name = "format";
624+
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
625+
626+
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
627+
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
628+
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
629+
}
630+
600631
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
601632
if (ret) {
602633
perf_ibs->pcpu = NULL;
@@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
608639

609640
static __init int perf_event_ibs_init(void)
610641
{
642+
struct attribute **attr = ibs_op_format_attrs;
643+
611644
if (!ibs_caps)
612645
return -ENODEV; /* ibs not supported by the cpu */
613646

614647
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
615-
if (ibs_caps & IBS_CAPS_OPCNT)
648+
649+
if (ibs_caps & IBS_CAPS_OPCNT) {
616650
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
651+
*attr++ = &format_attr_cnt_ctl.attr;
652+
}
617653
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
654+
618655
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
619656
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
620657

arch/x86/kernel/cpu/perf_event_intel.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
19061906
switch (boot_cpu_data.x86) {
19071907
case 0x6:
19081908
return p6_pmu_init();
1909+
case 0xb:
1910+
return knc_pmu_init();
19091911
case 0xf:
19101912
return p4_pmu_init();
19111913
}

arch/x86/kernel/cpu/perf_event_knc.c

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
2+
3+
#include <linux/perf_event.h>
4+
#include <linux/types.h>
5+
6+
#include "perf_event.h"
7+
8+
static const u64 knc_perfmon_event_map[] =
9+
{
10+
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
11+
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
12+
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
13+
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
14+
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
15+
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
16+
};
17+
18+
static __initconst u64 knc_hw_cache_event_ids
19+
[PERF_COUNT_HW_CACHE_MAX]
20+
[PERF_COUNT_HW_CACHE_OP_MAX]
21+
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
22+
{
23+
[ C(L1D) ] = {
24+
[ C(OP_READ) ] = {
25+
/* On Xeon Phi event "0" is a valid DATA_READ */
26+
/* (L1 Data Cache Reads) Instruction. */
27+
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
28+
/* bit will always be set in x86_pmu_hw_config(). */
29+
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
30+
/* DATA_READ */
31+
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
32+
},
33+
[ C(OP_WRITE) ] = {
34+
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
35+
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
36+
},
37+
[ C(OP_PREFETCH) ] = {
38+
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
39+
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
40+
},
41+
},
42+
[ C(L1I ) ] = {
43+
[ C(OP_READ) ] = {
44+
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
45+
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
46+
},
47+
[ C(OP_WRITE) ] = {
48+
[ C(RESULT_ACCESS) ] = -1,
49+
[ C(RESULT_MISS) ] = -1,
50+
},
51+
[ C(OP_PREFETCH) ] = {
52+
[ C(RESULT_ACCESS) ] = 0x0,
53+
[ C(RESULT_MISS) ] = 0x0,
54+
},
55+
},
56+
[ C(LL ) ] = {
57+
[ C(OP_READ) ] = {
58+
[ C(RESULT_ACCESS) ] = 0,
59+
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
60+
},
61+
[ C(OP_WRITE) ] = {
62+
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
63+
[ C(RESULT_MISS) ] = 0,
64+
},
65+
[ C(OP_PREFETCH) ] = {
66+
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
67+
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
68+
},
69+
},
70+
[ C(DTLB) ] = {
71+
[ C(OP_READ) ] = {
72+
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
73+
/* DATA_READ */
74+
/* see note on L1 OP_READ */
75+
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
76+
},
77+
[ C(OP_WRITE) ] = {
78+
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
79+
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
80+
},
81+
[ C(OP_PREFETCH) ] = {
82+
[ C(RESULT_ACCESS) ] = 0x0,
83+
[ C(RESULT_MISS) ] = 0x0,
84+
},
85+
},
86+
[ C(ITLB) ] = {
87+
[ C(OP_READ) ] = {
88+
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
89+
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
90+
},
91+
[ C(OP_WRITE) ] = {
92+
[ C(RESULT_ACCESS) ] = -1,
93+
[ C(RESULT_MISS) ] = -1,
94+
},
95+
[ C(OP_PREFETCH) ] = {
96+
[ C(RESULT_ACCESS) ] = -1,
97+
[ C(RESULT_MISS) ] = -1,
98+
},
99+
},
100+
[ C(BPU ) ] = {
101+
[ C(OP_READ) ] = {
102+
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
103+
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
104+
},
105+
[ C(OP_WRITE) ] = {
106+
[ C(RESULT_ACCESS) ] = -1,
107+
[ C(RESULT_MISS) ] = -1,
108+
},
109+
[ C(OP_PREFETCH) ] = {
110+
[ C(RESULT_ACCESS) ] = -1,
111+
[ C(RESULT_MISS) ] = -1,
112+
},
113+
},
114+
};
115+
116+
117+
static u64 knc_pmu_event_map(int hw_event)
118+
{
119+
return knc_perfmon_event_map[hw_event];
120+
}
121+
122+
static struct event_constraint knc_event_constraints[] =
123+
{
124+
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
125+
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
126+
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
127+
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
128+
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
129+
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
130+
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
131+
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
132+
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
133+
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
134+
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
135+
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
136+
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
137+
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
138+
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
139+
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
140+
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
141+
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
142+
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
143+
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
144+
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
145+
EVENT_CONSTRAINT_END
146+
};
147+
148+
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
149+
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
150+
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
151+
152+
#define KNC_ENABLE_COUNTER0 0x00000001
153+
#define KNC_ENABLE_COUNTER1 0x00000002
154+
155+
static void knc_pmu_disable_all(void)
156+
{
157+
u64 val;
158+
159+
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
160+
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
161+
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
162+
}
163+
164+
static void knc_pmu_enable_all(int added)
165+
{
166+
u64 val;
167+
168+
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
169+
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
170+
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
171+
}
172+
173+
static inline void
174+
knc_pmu_disable_event(struct perf_event *event)
175+
{
176+
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
177+
struct hw_perf_event *hwc = &event->hw;
178+
u64 val;
179+
180+
val = hwc->config;
181+
if (cpuc->enabled)
182+
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
183+
184+
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
185+
}
186+
187+
static void knc_pmu_enable_event(struct perf_event *event)
188+
{
189+
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
190+
struct hw_perf_event *hwc = &event->hw;
191+
u64 val;
192+
193+
val = hwc->config;
194+
if (cpuc->enabled)
195+
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
196+
197+
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
198+
}
199+
200+
PMU_FORMAT_ATTR(event, "config:0-7" );
201+
PMU_FORMAT_ATTR(umask, "config:8-15" );
202+
PMU_FORMAT_ATTR(edge, "config:18" );
203+
PMU_FORMAT_ATTR(inv, "config:23" );
204+
PMU_FORMAT_ATTR(cmask, "config:24-31" );
205+
206+
static struct attribute *intel_knc_formats_attr[] = {
207+
&format_attr_event.attr,
208+
&format_attr_umask.attr,
209+
&format_attr_edge.attr,
210+
&format_attr_inv.attr,
211+
&format_attr_cmask.attr,
212+
NULL,
213+
};
214+
215+
static __initconst struct x86_pmu knc_pmu = {
216+
.name = "knc",
217+
.handle_irq = x86_pmu_handle_irq,
218+
.disable_all = knc_pmu_disable_all,
219+
.enable_all = knc_pmu_enable_all,
220+
.enable = knc_pmu_enable_event,
221+
.disable = knc_pmu_disable_event,
222+
.hw_config = x86_pmu_hw_config,
223+
.schedule_events = x86_schedule_events,
224+
.eventsel = MSR_KNC_EVNTSEL0,
225+
.perfctr = MSR_KNC_PERFCTR0,
226+
.event_map = knc_pmu_event_map,
227+
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
228+
.apic = 1,
229+
.max_period = (1ULL << 31) - 1,
230+
.version = 0,
231+
.num_counters = 2,
232+
/* in theory 40 bits, early silicon is buggy though */
233+
.cntval_bits = 32,
234+
.cntval_mask = (1ULL << 32) - 1,
235+
.get_event_constraints = x86_get_event_constraints,
236+
.event_constraints = knc_event_constraints,
237+
.format_attrs = intel_knc_formats_attr,
238+
};
239+
240+
__init int knc_pmu_init(void)
241+
{
242+
x86_pmu = knc_pmu;
243+
244+
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
245+
sizeof(hw_cache_event_ids));
246+
247+
return 0;
248+
}

0 commit comments

Comments
 (0)