Skip to content

Commit cd65718

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/urgent fixes from Arnaldo Carvalho de Melo: * Add color overhead for stdio output buffer, which fixes --stdio output being chopped up on the hot (red) entries, fix from Jiri Olsa. * Get 'perf record -g -a sleep 1' working again, removing the need for -- separating perf options from the workload, restoring ages old behaviour, fix from Jiri Olsa. More patches allowing ~/.perfconfig setting up of default callchain collecting method ("fp" or "dwarf") left for next merge window. * Fixup mmap event consumption, where we were acking the consumption by writing the tail before actually accessing the event, which could lead to using overwritten records in things like 'perf record --call-graph'. From Zhouyi Zhou. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2 parents d17cccb + 8e50d38 commit cd65718

20 files changed

+151
-67
lines changed

tools/perf/Documentation/perf-record.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,20 @@ OPTIONS
9090
Number of mmap data pages. Must be a power of two.
9191

9292
-g::
93+
Enables call-graph (stack chain/backtrace) recording.
94+
9395
--call-graph::
94-
Do call-graph (stack chain/backtrace) recording.
96+
Setup and enable call-graph (stack chain/backtrace) recording,
97+
implies -g.
98+
99+
Allows specifying "fp" (frame pointer) or "dwarf"
100+
(DWARF's CFI - Call Frame Information) as the method to collect
101+
the information used to show the call graphs.
102+
103+
In some systems, where binaries are build with gcc
104+
--fomit-frame-pointer, using the "fp" method will produce bogus
105+
call graphs, using "dwarf", if available (perf tools linked to
106+
the libunwind library) should be used instead.
95107

96108
-q::
97109
--quiet::

tools/perf/Documentation/perf-top.txt

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -140,20 +140,12 @@ Default is to monitor all CPUS.
140140
--asm-raw::
141141
Show raw instruction encoding of assembly instructions.
142142

143-
-G [type,min,order]::
143+
-G::
144+
Enables call-graph (stack chain/backtrace) recording.
145+
144146
--call-graph::
145-
Display call chains using type, min percent threshold and order.
146-
type can be either:
147-
- flat: single column, linear exposure of call chains.
148-
- graph: use a graph tree, displaying absolute overhead rates.
149-
- fractal: like graph, but displays relative rates. Each branch of
150-
the tree is considered as a new profiled object.
151-
152-
order can be either:
153-
- callee: callee based call graph.
154-
- caller: inverted caller based call graph.
155-
156-
Default: fractal,0.5,callee.
147+
Setup and enable call-graph (stack chain/backtrace) recording,
148+
implies -G.
157149

158150
--ignore-callees=<regex>::
159151
Ignore callees of the function(s) matching the given regex.

tools/perf/builtin-kvm.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,11 +888,18 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
888888
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
889889
err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
890890
if (err) {
891+
perf_evlist__mmap_consume(kvm->evlist, idx);
891892
pr_err("Failed to parse sample\n");
892893
return -1;
893894
}
894895

895896
err = perf_session_queue_event(kvm->session, event, &sample, 0);
897+
/*
898+
* FIXME: Here we can't consume the event, as perf_session_queue_event will
899+
* point to it, and it'll get possibly overwritten by the kernel.
900+
*/
901+
perf_evlist__mmap_consume(kvm->evlist, idx);
902+
896903
if (err) {
897904
pr_err("Failed to enqueue sample: %d\n", err);
898905
return -1;

tools/perf/builtin-record.c

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -712,21 +712,12 @@ static int get_stack_size(char *str, unsigned long *_size)
712712
}
713713
#endif /* LIBUNWIND_SUPPORT */
714714

715-
int record_parse_callchain_opt(const struct option *opt,
716-
const char *arg, int unset)
715+
int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
717716
{
718-
struct perf_record_opts *opts = opt->value;
719717
char *tok, *name, *saveptr = NULL;
720718
char *buf;
721719
int ret = -1;
722720

723-
/* --no-call-graph */
724-
if (unset)
725-
return 0;
726-
727-
/* We specified default option if none is provided. */
728-
BUG_ON(!arg);
729-
730721
/* We need buffer that we know we can write to. */
731722
buf = malloc(strlen(arg) + 1);
732723
if (!buf)
@@ -764,27 +755,62 @@ int record_parse_callchain_opt(const struct option *opt,
764755
ret = get_stack_size(tok, &size);
765756
opts->stack_dump_size = size;
766757
}
767-
768-
if (!ret)
769-
pr_debug("callchain: stack dump size %d\n",
770-
opts->stack_dump_size);
771758
#endif /* LIBUNWIND_SUPPORT */
772759
} else {
773-
pr_err("callchain: Unknown -g option "
760+
pr_err("callchain: Unknown --call-graph option "
774761
"value: %s\n", arg);
775762
break;
776763
}
777764

778765
} while (0);
779766

780767
free(buf);
768+
return ret;
769+
}
770+
771+
static void callchain_debug(struct perf_record_opts *opts)
772+
{
773+
pr_debug("callchain: type %d\n", opts->call_graph);
781774

775+
if (opts->call_graph == CALLCHAIN_DWARF)
776+
pr_debug("callchain: stack dump size %d\n",
777+
opts->stack_dump_size);
778+
}
779+
780+
int record_parse_callchain_opt(const struct option *opt,
781+
const char *arg,
782+
int unset)
783+
{
784+
struct perf_record_opts *opts = opt->value;
785+
int ret;
786+
787+
/* --no-call-graph */
788+
if (unset) {
789+
opts->call_graph = CALLCHAIN_NONE;
790+
pr_debug("callchain: disabled\n");
791+
return 0;
792+
}
793+
794+
ret = record_parse_callchain(arg, opts);
782795
if (!ret)
783-
pr_debug("callchain: type %d\n", opts->call_graph);
796+
callchain_debug(opts);
784797

785798
return ret;
786799
}
787800

801+
int record_callchain_opt(const struct option *opt,
802+
const char *arg __maybe_unused,
803+
int unset __maybe_unused)
804+
{
805+
struct perf_record_opts *opts = opt->value;
806+
807+
if (opts->call_graph == CALLCHAIN_NONE)
808+
opts->call_graph = CALLCHAIN_FP;
809+
810+
callchain_debug(opts);
811+
return 0;
812+
}
813+
788814
static const char * const record_usage[] = {
789815
"perf record [<options>] [<command>]",
790816
"perf record [<options>] -- <command> [<options>]",
@@ -813,12 +839,12 @@ static struct perf_record record = {
813839
},
814840
};
815841

816-
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
842+
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
817843

818844
#ifdef LIBUNWIND_SUPPORT
819-
const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
845+
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
820846
#else
821-
const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
847+
const char record_callchain_help[] = CALLCHAIN_HELP "fp";
822848
#endif
823849

824850
/*
@@ -858,9 +884,12 @@ const struct option record_options[] = {
858884
"number of mmap data pages"),
859885
OPT_BOOLEAN(0, "group", &record.opts.group,
860886
"put the counters into a counter group"),
861-
OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
862-
"mode[,dump_size]", record_callchain_help,
863-
&record_parse_callchain_opt, "fp"),
887+
OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
888+
NULL, "enables call-graph recording" ,
889+
&record_callchain_opt),
890+
OPT_CALLBACK(0, "call-graph", &record.opts,
891+
"mode[,dump_size]", record_callchain_help,
892+
&record_parse_callchain_opt),
864893
OPT_INCR('v', "verbose", &verbose,
865894
"be more verbose (show counter open errors, etc)"),
866895
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),

tools/perf/builtin-top.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
810810
ret = perf_evlist__parse_sample(top->evlist, event, &sample);
811811
if (ret) {
812812
pr_err("Can't parse sample, err = %d\n", ret);
813-
continue;
813+
goto next_event;
814814
}
815815

816816
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
@@ -825,13 +825,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
825825
case PERF_RECORD_MISC_USER:
826826
++top->us_samples;
827827
if (top->hide_user_symbols)
828-
continue;
828+
goto next_event;
829829
machine = &session->machines.host;
830830
break;
831831
case PERF_RECORD_MISC_KERNEL:
832832
++top->kernel_samples;
833833
if (top->hide_kernel_symbols)
834-
continue;
834+
goto next_event;
835835
machine = &session->machines.host;
836836
break;
837837
case PERF_RECORD_MISC_GUEST_KERNEL:
@@ -847,7 +847,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
847847
*/
848848
/* Fall thru */
849849
default:
850-
continue;
850+
goto next_event;
851851
}
852852

853853

@@ -859,6 +859,8 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
859859
machine__process_event(machine, event);
860860
} else
861861
++session->stats.nr_unknown_events;
862+
next_event:
863+
perf_evlist__mmap_consume(top->evlist, idx);
862864
}
863865
}
864866

@@ -1016,16 +1018,16 @@ static int __cmd_top(struct perf_top *top)
10161018
}
10171019

10181020
static int
1019-
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1021+
callchain_opt(const struct option *opt, const char *arg, int unset)
10201022
{
1021-
/*
1022-
* --no-call-graph
1023-
*/
1024-
if (unset)
1025-
return 0;
1026-
10271023
symbol_conf.use_callchain = true;
1024+
return record_callchain_opt(opt, arg, unset);
1025+
}
10281026

1027+
static int
1028+
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1029+
{
1030+
symbol_conf.use_callchain = true;
10291031
return record_parse_callchain_opt(opt, arg, unset);
10301032
}
10311033

@@ -1106,9 +1108,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
11061108
"sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
11071109
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
11081110
"Show a column with the number of samples"),
1109-
OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
1110-
"mode[,dump_size]", record_callchain_help,
1111-
&parse_callchain_opt, "fp"),
1111+
OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts,
1112+
NULL, "enables call-graph recording",
1113+
&callchain_opt),
1114+
OPT_CALLBACK(0, "call-graph", &top.record_opts,
1115+
"mode[,dump_size]", record_callchain_help,
1116+
&parse_callchain_opt),
11121117
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
11131118
"ignore callees of these functions in call graphs",
11141119
report_parse_ignore_callees_opt),

tools/perf/builtin-trace.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
987987
err = perf_evlist__parse_sample(evlist, event, &sample);
988988
if (err) {
989989
fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
990-
continue;
990+
goto next_event;
991991
}
992992

993993
if (trace->base_time == 0)
@@ -1001,18 +1001,20 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
10011001
evsel = perf_evlist__id2evsel(evlist, sample.id);
10021002
if (evsel == NULL) {
10031003
fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1004-
continue;
1004+
goto next_event;
10051005
}
10061006

10071007
if (sample.raw_data == NULL) {
10081008
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
10091009
perf_evsel__name(evsel), sample.tid,
10101010
sample.cpu, sample.raw_size);
1011-
continue;
1011+
goto next_event;
10121012
}
10131013

10141014
handler = evsel->handler.func;
10151015
handler(trace, evsel, &sample);
1016+
next_event:
1017+
perf_evlist__mmap_consume(evlist, i);
10161018

10171019
if (done)
10181020
goto out_unmap_evlist;

tools/perf/tests/code-reading.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
290290
for (i = 0; i < evlist->nr_mmaps; i++) {
291291
while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
292292
ret = process_event(machine, evlist, event, state);
293+
perf_evlist__mmap_consume(evlist, i);
293294
if (ret < 0)
294295
return ret;
295296
}

tools/perf/tests/keep-tracking.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
3636
(pid_t)event->comm.tid == getpid() &&
3737
strcmp(event->comm.comm, comm) == 0)
3838
found += 1;
39+
perf_evlist__mmap_consume(evlist, i);
3940
}
4041
}
4142
return found;

tools/perf/tests/mmap-basic.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ int test__basic_mmap(void)
122122
goto out_munmap;
123123
}
124124
nr_events[evsel->idx]++;
125+
perf_evlist__mmap_consume(evlist, 0);
125126
}
126127

127128
err = 0;

tools/perf/tests/open-syscall-tp-fields.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ int test__syscall_open_tp_fields(void)
7777

7878
++nr_events;
7979

80-
if (type != PERF_RECORD_SAMPLE)
80+
if (type != PERF_RECORD_SAMPLE) {
81+
perf_evlist__mmap_consume(evlist, i);
8182
continue;
83+
}
8284

8385
err = perf_evsel__parse_sample(evsel, event, &sample);
8486
if (err) {

tools/perf/tests/perf-record.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,8 @@ int test__PERF_RECORD(void)
263263
type);
264264
++errs;
265265
}
266+
267+
perf_evlist__mmap_consume(evlist, i);
266268
}
267269
}
268270

tools/perf/tests/perf-time-to-tsc.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ int test__perf_time_to_tsc(void)
122122
if (event->header.type != PERF_RECORD_COMM ||
123123
(pid_t)event->comm.pid != getpid() ||
124124
(pid_t)event->comm.tid != getpid())
125-
continue;
125+
goto next_event;
126126

127127
if (strcmp(event->comm.comm, comm1) == 0) {
128128
CHECK__(perf_evsel__parse_sample(evsel, event,
@@ -134,6 +134,8 @@ int test__perf_time_to_tsc(void)
134134
&sample));
135135
comm2_time = sample.time;
136136
}
137+
next_event:
138+
perf_evlist__mmap_consume(evlist, i);
137139
}
138140
}
139141

tools/perf/tests/sw-clock.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
7878
struct perf_sample sample;
7979

8080
if (event->header.type != PERF_RECORD_SAMPLE)
81-
continue;
81+
goto next_event;
8282

8383
err = perf_evlist__parse_sample(evlist, event, &sample);
8484
if (err < 0) {
@@ -88,6 +88,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
8888

8989
total_periods += sample.period;
9090
nr_samples++;
91+
next_event:
92+
perf_evlist__mmap_consume(evlist, 0);
9193
}
9294

9395
if ((u64) nr_samples == total_periods) {

0 commit comments

Comments
 (0)