Skip to content

Commit 9a92e16

Browse files
Andi KleenIngo Molnar
authored andcommitted
perf/x86/intel: Add Intel Skylake PMU support
Add perf core PMU support for future Intel Skylake CPU cores. The code is based on Haswell/Broadwell. There is a new cache event list, based on the updated Haswell event list. Skylake has removed most counter constraints on basic events, so the basic constraints table now only has a single entry (plus the fixed counters). TSX support and various other setups are all shared with Haswell. Skylake has 32 LBR entries. Add a new LBR init function to set this up. The filters are all the same as Haswell. It also has a new LBR format with a separate LBR_INFO_* MSR, but that has been already added earlier. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: eranian@google.com Link: http://lkml.kernel.org/r/1431285767-27027-7-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 425507f commit 9a92e16

File tree

4 files changed

+279
-1
lines changed

4 files changed

+279
-1
lines changed

arch/x86/kernel/cpu/perf_event.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ struct intel_excl_cntrs {
165165
unsigned core_id; /* per-core: core id */
166166
};
167167

168-
#define MAX_LBR_ENTRIES 16
168+
#define MAX_LBR_ENTRIES 32
169169

170170
enum {
171171
X86_PERF_KFREE_SHARED = 0,
@@ -861,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
861861

862862
extern struct event_constraint intel_hsw_pebs_event_constraints[];
863863

864+
extern struct event_constraint intel_skl_pebs_event_constraints[];
865+
864866
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
865867

866868
void intel_pmu_pebs_enable(struct perf_event *event);
@@ -899,6 +901,8 @@ void intel_pmu_lbr_init_snb(void);
899901

900902
void intel_pmu_lbr_init_hsw(void);
901903

904+
void intel_pmu_lbr_init_skl(void);
905+
902906
int intel_pmu_setup_lbr_filter(struct perf_event *event);
903907

904908
void intel_pt_interrupt(void);

arch/x86/kernel/cpu/perf_event_intel.c

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
177177
EVENT_CONSTRAINT_END
178178
};
179179

180+
struct event_constraint intel_skl_event_constraints[] = {
181+
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
182+
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
183+
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
184+
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
185+
EVENT_CONSTRAINT_END
186+
};
187+
180188
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
181189
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
182190
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
193201
EVENT_EXTRA_END
194202
};
195203

204+
static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
205+
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
206+
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
207+
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
208+
EVENT_EXTRA_END
209+
};
210+
196211
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
197212
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
198213
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
244259
return intel_perfmon_event_map[hw_event];
245260
}
246261

262+
/*
263+
* Notes on the events:
264+
* - data reads do not include code reads (comparable to earlier tables)
265+
* - data counts include speculative execution (except L1 write, dtlb, bpu)
266+
* - remote node access includes remote memory, remote cache, remote mmio.
267+
* - prefetches are not included in the counts.
268+
* - icache miss does not include decoded icache
269+
*/
270+
271+
#define SKL_DEMAND_DATA_RD BIT_ULL(0)
272+
#define SKL_DEMAND_RFO BIT_ULL(1)
273+
#define SKL_ANY_RESPONSE BIT_ULL(16)
274+
#define SKL_SUPPLIER_NONE BIT_ULL(17)
275+
#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
276+
#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
277+
#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
278+
#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
279+
#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
280+
SKL_L3_MISS_REMOTE_HOP0_DRAM| \
281+
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
282+
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
283+
#define SKL_SPL_HIT BIT_ULL(30)
284+
#define SKL_SNOOP_NONE BIT_ULL(31)
285+
#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
286+
#define SKL_SNOOP_MISS BIT_ULL(33)
287+
#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
288+
#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
289+
#define SKL_SNOOP_HITM BIT_ULL(36)
290+
#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
291+
#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
292+
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
293+
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
294+
SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
295+
#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
296+
#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
297+
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
298+
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
299+
SKL_SNOOP_HITM|SKL_SPL_HIT)
300+
#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
301+
#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
302+
#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
303+
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
304+
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
305+
306+
static __initconst const u64 skl_hw_cache_event_ids
307+
[PERF_COUNT_HW_CACHE_MAX]
308+
[PERF_COUNT_HW_CACHE_OP_MAX]
309+
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
310+
{
311+
[ C(L1D ) ] = {
312+
[ C(OP_READ) ] = {
313+
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
314+
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
315+
},
316+
[ C(OP_WRITE) ] = {
317+
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
318+
[ C(RESULT_MISS) ] = 0x0,
319+
},
320+
[ C(OP_PREFETCH) ] = {
321+
[ C(RESULT_ACCESS) ] = 0x0,
322+
[ C(RESULT_MISS) ] = 0x0,
323+
},
324+
},
325+
[ C(L1I ) ] = {
326+
[ C(OP_READ) ] = {
327+
[ C(RESULT_ACCESS) ] = 0x0,
328+
[ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
329+
},
330+
[ C(OP_WRITE) ] = {
331+
[ C(RESULT_ACCESS) ] = -1,
332+
[ C(RESULT_MISS) ] = -1,
333+
},
334+
[ C(OP_PREFETCH) ] = {
335+
[ C(RESULT_ACCESS) ] = 0x0,
336+
[ C(RESULT_MISS) ] = 0x0,
337+
},
338+
},
339+
[ C(LL ) ] = {
340+
[ C(OP_READ) ] = {
341+
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
342+
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
343+
},
344+
[ C(OP_WRITE) ] = {
345+
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
346+
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
347+
},
348+
[ C(OP_PREFETCH) ] = {
349+
[ C(RESULT_ACCESS) ] = 0x0,
350+
[ C(RESULT_MISS) ] = 0x0,
351+
},
352+
},
353+
[ C(DTLB) ] = {
354+
[ C(OP_READ) ] = {
355+
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
356+
[ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
357+
},
358+
[ C(OP_WRITE) ] = {
359+
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
360+
[ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
361+
},
362+
[ C(OP_PREFETCH) ] = {
363+
[ C(RESULT_ACCESS) ] = 0x0,
364+
[ C(RESULT_MISS) ] = 0x0,
365+
},
366+
},
367+
[ C(ITLB) ] = {
368+
[ C(OP_READ) ] = {
369+
[ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
370+
[ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
371+
},
372+
[ C(OP_WRITE) ] = {
373+
[ C(RESULT_ACCESS) ] = -1,
374+
[ C(RESULT_MISS) ] = -1,
375+
},
376+
[ C(OP_PREFETCH) ] = {
377+
[ C(RESULT_ACCESS) ] = -1,
378+
[ C(RESULT_MISS) ] = -1,
379+
},
380+
},
381+
[ C(BPU ) ] = {
382+
[ C(OP_READ) ] = {
383+
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
384+
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
385+
},
386+
[ C(OP_WRITE) ] = {
387+
[ C(RESULT_ACCESS) ] = -1,
388+
[ C(RESULT_MISS) ] = -1,
389+
},
390+
[ C(OP_PREFETCH) ] = {
391+
[ C(RESULT_ACCESS) ] = -1,
392+
[ C(RESULT_MISS) ] = -1,
393+
},
394+
},
395+
[ C(NODE) ] = {
396+
[ C(OP_READ) ] = {
397+
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
398+
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
399+
},
400+
[ C(OP_WRITE) ] = {
401+
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
402+
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
403+
},
404+
[ C(OP_PREFETCH) ] = {
405+
[ C(RESULT_ACCESS) ] = 0x0,
406+
[ C(RESULT_MISS) ] = 0x0,
407+
},
408+
},
409+
};
410+
411+
static __initconst const u64 skl_hw_cache_extra_regs
412+
[PERF_COUNT_HW_CACHE_MAX]
413+
[PERF_COUNT_HW_CACHE_OP_MAX]
414+
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
415+
{
416+
[ C(LL ) ] = {
417+
[ C(OP_READ) ] = {
418+
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
419+
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
420+
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
421+
SKL_L3_MISS|SKL_ANY_SNOOP|
422+
SKL_SUPPLIER_NONE,
423+
},
424+
[ C(OP_WRITE) ] = {
425+
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
426+
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
427+
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
428+
SKL_L3_MISS|SKL_ANY_SNOOP|
429+
SKL_SUPPLIER_NONE,
430+
},
431+
[ C(OP_PREFETCH) ] = {
432+
[ C(RESULT_ACCESS) ] = 0x0,
433+
[ C(RESULT_MISS) ] = 0x0,
434+
},
435+
},
436+
[ C(NODE) ] = {
437+
[ C(OP_READ) ] = {
438+
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
439+
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
440+
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
441+
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
442+
},
443+
[ C(OP_WRITE) ] = {
444+
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
445+
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
446+
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
447+
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
448+
},
449+
[ C(OP_PREFETCH) ] = {
450+
[ C(RESULT_ACCESS) ] = 0x0,
451+
[ C(RESULT_MISS) ] = 0x0,
452+
},
453+
},
454+
};
455+
247456
#define SNB_DMND_DATA_RD (1ULL << 0)
248457
#define SNB_DMND_RFO (1ULL << 1)
249458
#define SNB_DMND_IFETCH (1ULL << 2)
@@ -3278,6 +3487,29 @@ __init int intel_pmu_init(void)
32783487
pr_cont("Broadwell events, ");
32793488
break;
32803489

3490+
case 78: /* 14nm Skylake Mobile */
3491+
case 94: /* 14nm Skylake Desktop */
3492+
x86_pmu.late_ack = true;
3493+
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
3494+
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
3495+
intel_pmu_lbr_init_skl();
3496+
3497+
x86_pmu.event_constraints = intel_skl_event_constraints;
3498+
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
3499+
x86_pmu.extra_regs = intel_skl_extra_regs;
3500+
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
3501+
/* all extra regs are per-cpu when HT is on */
3502+
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
3503+
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
3504+
3505+
x86_pmu.hw_config = hsw_hw_config;
3506+
x86_pmu.get_event_constraints = hsw_get_event_constraints;
3507+
x86_pmu.cpu_events = hsw_events_attrs;
3508+
WARN_ON(!x86_pmu.format_attrs);
3509+
x86_pmu.cpu_events = hsw_events_attrs;
3510+
pr_cont("Skylake events, ");
3511+
break;
3512+
32813513
default:
32823514
switch (x86_pmu.version) {
32833515
case 1:

arch/x86/kernel/cpu/perf_event_intel_ds.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,28 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
688688
EVENT_CONSTRAINT_END
689689
};
690690

691+
struct event_constraint intel_skl_pebs_event_constraints[] = {
692+
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
693+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
694+
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
695+
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
696+
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
697+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
698+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
699+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
700+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
701+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
702+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
703+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
704+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
705+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
706+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
707+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
708+
/* Allow all events as PEBS with no flags */
709+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
710+
EVENT_CONSTRAINT_END
711+
};
712+
691713
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
692714
{
693715
struct event_constraint *c;

arch/x86/kernel/cpu/perf_event_intel_lbr.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,26 @@ void intel_pmu_lbr_init_hsw(void)
973973
pr_cont("16-deep LBR, ");
974974
}
975975

976+
/* skylake */
977+
__init void intel_pmu_lbr_init_skl(void)
978+
{
979+
x86_pmu.lbr_nr = 32;
980+
x86_pmu.lbr_tos = MSR_LBR_TOS;
981+
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
982+
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
983+
984+
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
985+
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
986+
987+
/*
988+
* SW branch filter usage:
989+
* - support syscall, sysret capture.
990+
* That requires LBR_FAR but that means far
991+
* jmp need to be filtered out
992+
*/
993+
pr_cont("32-deep LBR, ");
994+
}
995+
976996
/* atom */
977997
void __init intel_pmu_lbr_init_atom(void)
978998
{

0 commit comments

Comments
 (0)