|
| 1 | +/* Driver for Intel Xeon Phi "Knights Corner" PMU */ |
| 2 | + |
| 3 | +#include <linux/perf_event.h> |
| 4 | +#include <linux/types.h> |
| 5 | + |
| 6 | +#include "perf_event.h" |
| 7 | + |
| 8 | +static const u64 knc_perfmon_event_map[] = |
| 9 | +{ |
| 10 | + [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, |
| 11 | + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, |
| 12 | + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, |
| 13 | + [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, |
| 14 | + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, |
| 15 | + [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, |
| 16 | +}; |
| 17 | + |
| 18 | +static __initconst u64 knc_hw_cache_event_ids |
| 19 | + [PERF_COUNT_HW_CACHE_MAX] |
| 20 | + [PERF_COUNT_HW_CACHE_OP_MAX] |
| 21 | + [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 22 | +{ |
| 23 | + [ C(L1D) ] = { |
| 24 | + [ C(OP_READ) ] = { |
| 25 | + /* On Xeon Phi event "0" is a valid DATA_READ */ |
| 26 | + /* (L1 Data Cache Reads) Instruction. */ |
| 27 | + /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ |
| 28 | + /* bit will always be set in x86_pmu_hw_config(). */ |
| 29 | + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, |
| 30 | + /* DATA_READ */ |
| 31 | + [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ |
| 32 | + }, |
| 33 | + [ C(OP_WRITE) ] = { |
| 34 | + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ |
| 35 | + [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ |
| 36 | + }, |
| 37 | + [ C(OP_PREFETCH) ] = { |
| 38 | + [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ |
| 39 | + [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ |
| 40 | + }, |
| 41 | + }, |
| 42 | + [ C(L1I ) ] = { |
| 43 | + [ C(OP_READ) ] = { |
| 44 | + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ |
| 45 | + [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ |
| 46 | + }, |
| 47 | + [ C(OP_WRITE) ] = { |
| 48 | + [ C(RESULT_ACCESS) ] = -1, |
| 49 | + [ C(RESULT_MISS) ] = -1, |
| 50 | + }, |
| 51 | + [ C(OP_PREFETCH) ] = { |
| 52 | + [ C(RESULT_ACCESS) ] = 0x0, |
| 53 | + [ C(RESULT_MISS) ] = 0x0, |
| 54 | + }, |
| 55 | + }, |
| 56 | + [ C(LL ) ] = { |
| 57 | + [ C(OP_READ) ] = { |
| 58 | + [ C(RESULT_ACCESS) ] = 0, |
| 59 | + [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ |
| 60 | + }, |
| 61 | + [ C(OP_WRITE) ] = { |
| 62 | + [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ |
| 63 | + [ C(RESULT_MISS) ] = 0, |
| 64 | + }, |
| 65 | + [ C(OP_PREFETCH) ] = { |
| 66 | + [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ |
| 67 | + [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ |
| 68 | + }, |
| 69 | + }, |
| 70 | + [ C(DTLB) ] = { |
| 71 | + [ C(OP_READ) ] = { |
| 72 | + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, |
| 73 | + /* DATA_READ */ |
| 74 | + /* see note on L1 OP_READ */ |
| 75 | + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ |
| 76 | + }, |
| 77 | + [ C(OP_WRITE) ] = { |
| 78 | + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ |
| 79 | + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ |
| 80 | + }, |
| 81 | + [ C(OP_PREFETCH) ] = { |
| 82 | + [ C(RESULT_ACCESS) ] = 0x0, |
| 83 | + [ C(RESULT_MISS) ] = 0x0, |
| 84 | + }, |
| 85 | + }, |
| 86 | + [ C(ITLB) ] = { |
| 87 | + [ C(OP_READ) ] = { |
| 88 | + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ |
| 89 | + [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ |
| 90 | + }, |
| 91 | + [ C(OP_WRITE) ] = { |
| 92 | + [ C(RESULT_ACCESS) ] = -1, |
| 93 | + [ C(RESULT_MISS) ] = -1, |
| 94 | + }, |
| 95 | + [ C(OP_PREFETCH) ] = { |
| 96 | + [ C(RESULT_ACCESS) ] = -1, |
| 97 | + [ C(RESULT_MISS) ] = -1, |
| 98 | + }, |
| 99 | + }, |
| 100 | + [ C(BPU ) ] = { |
| 101 | + [ C(OP_READ) ] = { |
| 102 | + [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ |
| 103 | + [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ |
| 104 | + }, |
| 105 | + [ C(OP_WRITE) ] = { |
| 106 | + [ C(RESULT_ACCESS) ] = -1, |
| 107 | + [ C(RESULT_MISS) ] = -1, |
| 108 | + }, |
| 109 | + [ C(OP_PREFETCH) ] = { |
| 110 | + [ C(RESULT_ACCESS) ] = -1, |
| 111 | + [ C(RESULT_MISS) ] = -1, |
| 112 | + }, |
| 113 | + }, |
| 114 | +}; |
| 115 | + |
| 116 | + |
| 117 | +static u64 knc_pmu_event_map(int hw_event) |
| 118 | +{ |
| 119 | + return knc_perfmon_event_map[hw_event]; |
| 120 | +} |
| 121 | + |
| 122 | +static struct event_constraint knc_event_constraints[] = |
| 123 | +{ |
| 124 | + INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ |
| 125 | + INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ |
| 126 | + INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ |
| 127 | + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ |
| 128 | + INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ |
| 129 | + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ |
| 130 | + INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ |
| 131 | + INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ |
| 132 | + INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ |
| 133 | + INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ |
| 134 | + INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ |
| 135 | + INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ |
| 136 | + INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ |
| 137 | + INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ |
| 138 | + INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ |
| 139 | + INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ |
| 140 | + INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ |
| 141 | + INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ |
| 142 | + INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ |
| 143 | + INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ |
| 144 | + INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ |
| 145 | + EVENT_CONSTRAINT_END |
| 146 | +}; |
| 147 | + |
| 148 | +#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d |
| 149 | +#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e |
| 150 | +#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f |
| 151 | + |
| 152 | +#define KNC_ENABLE_COUNTER0 0x00000001 |
| 153 | +#define KNC_ENABLE_COUNTER1 0x00000002 |
| 154 | + |
| 155 | +static void knc_pmu_disable_all(void) |
| 156 | +{ |
| 157 | + u64 val; |
| 158 | + |
| 159 | + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); |
| 160 | + val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); |
| 161 | + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); |
| 162 | +} |
| 163 | + |
| 164 | +static void knc_pmu_enable_all(int added) |
| 165 | +{ |
| 166 | + u64 val; |
| 167 | + |
| 168 | + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); |
| 169 | + val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); |
| 170 | + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); |
| 171 | +} |
| 172 | + |
| 173 | +static inline void |
| 174 | +knc_pmu_disable_event(struct perf_event *event) |
| 175 | +{ |
| 176 | + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 177 | + struct hw_perf_event *hwc = &event->hw; |
| 178 | + u64 val; |
| 179 | + |
| 180 | + val = hwc->config; |
| 181 | + if (cpuc->enabled) |
| 182 | + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 183 | + |
| 184 | + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); |
| 185 | +} |
| 186 | + |
| 187 | +static void knc_pmu_enable_event(struct perf_event *event) |
| 188 | +{ |
| 189 | + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 190 | + struct hw_perf_event *hwc = &event->hw; |
| 191 | + u64 val; |
| 192 | + |
| 193 | + val = hwc->config; |
| 194 | + if (cpuc->enabled) |
| 195 | + val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
| 196 | + |
| 197 | + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); |
| 198 | +} |
| 199 | + |
| 200 | +PMU_FORMAT_ATTR(event, "config:0-7" ); |
| 201 | +PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 202 | +PMU_FORMAT_ATTR(edge, "config:18" ); |
| 203 | +PMU_FORMAT_ATTR(inv, "config:23" ); |
| 204 | +PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
| 205 | + |
| 206 | +static struct attribute *intel_knc_formats_attr[] = { |
| 207 | + &format_attr_event.attr, |
| 208 | + &format_attr_umask.attr, |
| 209 | + &format_attr_edge.attr, |
| 210 | + &format_attr_inv.attr, |
| 211 | + &format_attr_cmask.attr, |
| 212 | + NULL, |
| 213 | +}; |
| 214 | + |
| 215 | +static __initconst struct x86_pmu knc_pmu = { |
| 216 | + .name = "knc", |
| 217 | + .handle_irq = x86_pmu_handle_irq, |
| 218 | + .disable_all = knc_pmu_disable_all, |
| 219 | + .enable_all = knc_pmu_enable_all, |
| 220 | + .enable = knc_pmu_enable_event, |
| 221 | + .disable = knc_pmu_disable_event, |
| 222 | + .hw_config = x86_pmu_hw_config, |
| 223 | + .schedule_events = x86_schedule_events, |
| 224 | + .eventsel = MSR_KNC_EVNTSEL0, |
| 225 | + .perfctr = MSR_KNC_PERFCTR0, |
| 226 | + .event_map = knc_pmu_event_map, |
| 227 | + .max_events = ARRAY_SIZE(knc_perfmon_event_map), |
| 228 | + .apic = 1, |
| 229 | + .max_period = (1ULL << 31) - 1, |
| 230 | + .version = 0, |
| 231 | + .num_counters = 2, |
| 232 | + /* in theory 40 bits, early silicon is buggy though */ |
| 233 | + .cntval_bits = 32, |
| 234 | + .cntval_mask = (1ULL << 32) - 1, |
| 235 | + .get_event_constraints = x86_get_event_constraints, |
| 236 | + .event_constraints = knc_event_constraints, |
| 237 | + .format_attrs = intel_knc_formats_attr, |
| 238 | +}; |
| 239 | + |
| 240 | +__init int knc_pmu_init(void) |
| 241 | +{ |
| 242 | + x86_pmu = knc_pmu; |
| 243 | + |
| 244 | + memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, |
| 245 | + sizeof(hw_cache_event_ids)); |
| 246 | + |
| 247 | + return 0; |
| 248 | +} |
0 commit comments