Skip to content

Commit d8fce2d

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, but also an uncore PMU driver fix and an uncore PMU driver hardware-enablement addition" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf probe: Fix segfault if passed with ''. perf report: Fix -T/--threads option to work again perf bench numa: Fix immediate meeting of convergence condition perf bench numa: Fixes of --quiet argument perf bench futex: Fix hung wakeup tasks after requeueing perf probe: Fix bug with global variables handling perf top: Fix a segfault when kernel map is restricted. tools lib traceevent: Fix build failure on 32-bit arch perf kmem: Fix compiles on RHEL6/OL6 tools lib api: Undefine _FORTIFY_SOURCE before setting it perf kmem: Consistently use PRIu64 for printing u64 values perf trace: Disable events and drain events when forked workload ends perf trace: Enable events when doing system wide tracing and starting a workload perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu
2 parents 02f0f57 + 74f40c1 commit d8fce2d

File tree

13 files changed

+114
-77
lines changed

13 files changed

+114
-77
lines changed

arch/x86/kernel/cpu/perf_event_intel.c

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
25332533
return x86_event_sysfs_show(page, config, event);
25342534
}
25352535

2536-
static __initconst const struct x86_pmu core_pmu = {
2537-
.name = "core",
2538-
.handle_irq = x86_pmu_handle_irq,
2539-
.disable_all = x86_pmu_disable_all,
2540-
.enable_all = core_pmu_enable_all,
2541-
.enable = core_pmu_enable_event,
2542-
.disable = x86_pmu_disable_event,
2543-
.hw_config = x86_pmu_hw_config,
2544-
.schedule_events = x86_schedule_events,
2545-
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2546-
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2547-
.event_map = intel_pmu_event_map,
2548-
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
2549-
.apic = 1,
2550-
/*
2551-
* Intel PMCs cannot be accessed sanely above 32 bit width,
2552-
* so we install an artificial 1<<31 period regardless of
2553-
* the generic event period:
2554-
*/
2555-
.max_period = (1ULL << 31) - 1,
2556-
.get_event_constraints = intel_get_event_constraints,
2557-
.put_event_constraints = intel_put_event_constraints,
2558-
.event_constraints = intel_core_event_constraints,
2559-
.guest_get_msrs = core_guest_get_msrs,
2560-
.format_attrs = intel_arch_formats_attr,
2561-
.events_sysfs_show = intel_event_sysfs_show,
2562-
};
2563-
25642536
struct intel_shared_regs *allocate_shared_regs(int cpu)
25652537
{
25662538
struct intel_shared_regs *regs;
@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
27432715
NULL,
27442716
};
27452717

2718+
static __initconst const struct x86_pmu core_pmu = {
2719+
.name = "core",
2720+
.handle_irq = x86_pmu_handle_irq,
2721+
.disable_all = x86_pmu_disable_all,
2722+
.enable_all = core_pmu_enable_all,
2723+
.enable = core_pmu_enable_event,
2724+
.disable = x86_pmu_disable_event,
2725+
.hw_config = x86_pmu_hw_config,
2726+
.schedule_events = x86_schedule_events,
2727+
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2728+
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2729+
.event_map = intel_pmu_event_map,
2730+
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
2731+
.apic = 1,
2732+
/*
2733+
* Intel PMCs cannot be accessed sanely above 32-bit width,
2734+
* so we install an artificial 1<<31 period regardless of
2735+
* the generic event period:
2736+
*/
2737+
.max_period = (1ULL<<31) - 1,
2738+
.get_event_constraints = intel_get_event_constraints,
2739+
.put_event_constraints = intel_put_event_constraints,
2740+
.event_constraints = intel_core_event_constraints,
2741+
.guest_get_msrs = core_guest_get_msrs,
2742+
.format_attrs = intel_arch_formats_attr,
2743+
.events_sysfs_show = intel_event_sysfs_show,
2744+
2745+
/*
2746+
* Virtual (or funny metal) CPU can define x86_pmu.extra_regs
2747+
* together with PMU version 1 and thus be using core_pmu with
2748+
* shared_regs. We need following callbacks here to allocate
2749+
* it properly.
2750+
*/
2751+
.cpu_prepare = intel_pmu_cpu_prepare,
2752+
.cpu_starting = intel_pmu_cpu_starting,
2753+
.cpu_dying = intel_pmu_cpu_dying,
2754+
};
2755+
27462756
static __initconst const struct x86_pmu intel_pmu = {
27472757
.name = "Intel",
27482758
.handle_irq = intel_pmu_handle_irq,

arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
/* Nehalem/SandBridge/Haswell uncore support */
22
#include "perf_event_intel_uncore.h"
33

4+
/* Uncore IMC PCI IDs */
5+
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
6+
#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
7+
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
8+
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
9+
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
10+
411
/* SNB event control */
512
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
613
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
@@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
472479
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
473480
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
474481
},
482+
{ /* IMC */
483+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
484+
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
485+
},
475486
{ /* end: all zeroes */ },
476487
};
477488

@@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
502513
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
503514
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
504515
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
516+
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
505517
{ /* end marker */ }
506518
};
507519

include/linux/pci_ids.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2541,10 +2541,6 @@
25412541

25422542
#define PCI_VENDOR_ID_INTEL 0x8086
25432543
#define PCI_DEVICE_ID_INTEL_EESSC 0x0008
2544-
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
2545-
#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
2546-
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
2547-
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
25482544
#define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320
25492545
#define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321
25502546
#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329

tools/lib/api/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
1616
LIBFILE = $(OUTPUT)libapi.a
1717

1818
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
19-
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
19+
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
2020
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
2121

2222
RM = rm -f

tools/lib/traceevent/event-parse.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
38653865
} else if (el_size == 4) {
38663866
trace_seq_printf(s, "%u", *(uint32_t *)num);
38673867
} else if (el_size == 8) {
3868-
trace_seq_printf(s, "%lu", *(uint64_t *)num);
3868+
trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
38693869
} else {
38703870
trace_seq_printf(s, "BAD SIZE:%d 0x%x",
38713871
el_size, *(uint8_t *)num);

tools/perf/bench/futex-requeue.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
132132
if (!fshared)
133133
futex_flag = FUTEX_PRIVATE_FLAG;
134134

135+
if (nrequeue > nthreads)
136+
nrequeue = nthreads;
137+
135138
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
136139
"%d at a time.\n\n", getpid(), nthreads,
137140
fshared ? "shared":"private", &futex1, &futex2, nrequeue);
@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
161164

162165
/* Ok, all threads are patiently blocked, start requeueing */
163166
gettimeofday(&start, NULL);
164-
for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
167+
while (nrequeued < nthreads) {
165168
/*
166169
* Do not wakeup any tasks blocked on futex1, allowing
167170
* us to really measure futex_wait functionality.
168171
*/
169-
futex_cmp_requeue(&futex1, 0, &futex2, 0,
170-
nrequeue, futex_flag);
172+
nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
173+
nrequeue, futex_flag);
171174
}
175+
172176
gettimeofday(&end, NULL);
173177
timersub(&end, &start, &runtime);
174178

175-
if (nrequeued > nthreads)
176-
nrequeued = nthreads;
177-
178179
update_stats(&requeued_stats, nrequeued);
179180
update_stats(&requeuetime_stats, runtime.tv_usec);
180181

@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
184185
}
185186

186187
/* everybody should be blocked on futex2, wake'em up */
187-
nrequeued = futex_wake(&futex2, nthreads, futex_flag);
188+
nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
188189
if (nthreads != nrequeued)
189190
warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
190191

tools/perf/bench/numa.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static const struct option options[] = {
180180
OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
181181
OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
182182
OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
183-
OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
183+
OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
184184
OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
185185

186186
/* Special option string parsing callbacks: */
@@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr)
828828
td = g->threads + task_nr;
829829

830830
node = numa_node_of_cpu(td->curr_cpu);
831+
if (node < 0) /* curr_cpu was likely still -1 */
832+
return 0;
833+
831834
node_present[node] = 1;
832835
}
833836

@@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong)
882885
for (p = 0; p < g->p.nr_proc; p++) {
883886
unsigned int nodes = count_process_nodes(p);
884887

888+
if (!nodes) {
889+
*strong = 0;
890+
return;
891+
}
892+
885893
nodes_min = min(nodes, nodes_min);
886894
nodes_max = max(nodes, nodes_max);
887895
}
@@ -1395,7 +1403,7 @@ static void print_res(const char *name, double val,
13951403
if (!name)
13961404
name = "main,";
13971405

1398-
if (g->p.show_quiet)
1406+
if (!g->p.show_quiet)
13991407
printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
14001408
else
14011409
printf(" %14.3f %s\n", val, txt_long);

tools/perf/builtin-kmem.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
319319
return 0;
320320
}
321321

322-
static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
322+
static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
323323
{
324324
struct rb_node **node = &page_alloc_tree.rb_node;
325325
struct rb_node *parent = NULL;
@@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
331331
parent = *node;
332332
data = rb_entry(*node, struct page_stat, node);
333333

334-
cmp = page_stat_cmp(data, stat);
334+
cmp = page_stat_cmp(data, pstat);
335335
if (cmp < 0)
336336
node = &parent->rb_left;
337337
else if (cmp > 0)
@@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
345345

346346
data = zalloc(sizeof(*data));
347347
if (data != NULL) {
348-
data->page = stat->page;
349-
data->order = stat->order;
350-
data->gfp_flags = stat->gfp_flags;
351-
data->migrate_type = stat->migrate_type;
348+
data->page = pstat->page;
349+
data->order = pstat->order;
350+
data->gfp_flags = pstat->gfp_flags;
351+
data->migrate_type = pstat->migrate_type;
352352

353353
rb_link_node(&data->node, parent, node);
354354
rb_insert_color(&data->node, &page_alloc_tree);
@@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
375375
unsigned int migrate_type = perf_evsel__intval(evsel, sample,
376376
"migratetype");
377377
u64 bytes = kmem_page_size << order;
378-
struct page_stat *stat;
378+
struct page_stat *pstat;
379379
struct page_stat this = {
380380
.order = order,
381381
.gfp_flags = gfp_flags,
@@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
401401
* This is to find the current page (with correct gfp flags and
402402
* migrate type) at free event.
403403
*/
404-
stat = search_page(page, true);
405-
if (stat == NULL)
404+
pstat = search_page(page, true);
405+
if (pstat == NULL)
406406
return -ENOMEM;
407407

408-
stat->order = order;
409-
stat->gfp_flags = gfp_flags;
410-
stat->migrate_type = migrate_type;
408+
pstat->order = order;
409+
pstat->gfp_flags = gfp_flags;
410+
pstat->migrate_type = migrate_type;
411411

412412
this.page = page;
413-
stat = search_page_alloc_stat(&this, true);
414-
if (stat == NULL)
413+
pstat = search_page_alloc_stat(&this, true);
414+
if (pstat == NULL)
415415
return -ENOMEM;
416416

417-
stat->nr_alloc++;
418-
stat->alloc_bytes += bytes;
417+
pstat->nr_alloc++;
418+
pstat->alloc_bytes += bytes;
419419

420420
order_stats[order][migrate_type]++;
421421

@@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
428428
u64 page;
429429
unsigned int order = perf_evsel__intval(evsel, sample, "order");
430430
u64 bytes = kmem_page_size << order;
431-
struct page_stat *stat;
431+
struct page_stat *pstat;
432432
struct page_stat this = {
433433
.order = order,
434434
};
@@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
441441
nr_page_frees++;
442442
total_page_free_bytes += bytes;
443443

444-
stat = search_page(page, false);
445-
if (stat == NULL) {
444+
pstat = search_page(page, false);
445+
if (pstat == NULL) {
446446
pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
447447
page, order);
448448

@@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
453453
}
454454

455455
this.page = page;
456-
this.gfp_flags = stat->gfp_flags;
457-
this.migrate_type = stat->migrate_type;
456+
this.gfp_flags = pstat->gfp_flags;
457+
this.migrate_type = pstat->migrate_type;
458458

459-
rb_erase(&stat->node, &page_tree);
460-
free(stat);
459+
rb_erase(&pstat->node, &page_tree);
460+
free(pstat);
461461

462-
stat = search_page_alloc_stat(&this, false);
463-
if (stat == NULL)
462+
pstat = search_page_alloc_stat(&this, false);
463+
if (pstat == NULL)
464464
return -ENOENT;
465465

466-
stat->nr_free++;
467-
stat->free_bytes += bytes;
466+
pstat->nr_free++;
467+
pstat->free_bytes += bytes;
468468

469469
return 0;
470470
}
@@ -640,9 +640,9 @@ static void print_page_summary(void)
640640
nr_page_frees, total_page_free_bytes / 1024);
641641
printf("\n");
642642

643-
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
643+
printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
644644
nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
645-
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
645+
printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
646646
nr_page_allocs - nr_alloc_freed,
647647
(total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
648648
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",

tools/perf/builtin-report.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
329329
fprintf(stdout, "\n\n");
330330
}
331331

332-
if (sort_order == default_sort_order &&
332+
if (sort_order == NULL &&
333333
parent_pattern == default_parent_pattern) {
334334
fprintf(stdout, "#\n# (%s)\n#\n", help);
335335

tools/perf/builtin-top.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
733733
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
734734
"Check /proc/sys/kernel/kptr_restrict.\n\n"
735735
"Kernel%s samples will not be resolved.\n",
736-
!RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
736+
al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
737737
" modules" : "");
738738
if (use_browser <= 0)
739739
sleep(5);

0 commit comments

Comments
 (0)