Skip to content

Commit 99f753f

Browse files
Andi Kleenacmel
authored andcommitted
perf script: Implement --graph-function
Add a ftrace style --graph-function argument to 'perf script' that allows to print itrace function calls only below a given function. This makes it easier to find the code of interest in a large trace. % perf record -e intel_pt//k -a sleep 1 % perf script --graph-function group_sched_in --call-trace perf 900 [000] 194167.205652203: ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_set_state.part.71 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_time perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_disable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_log_itrace_start perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) calc_timer_values perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_cpu perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) arch_perf_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __fentry__ perf 900 [000] 194167.205652203: ([kernel.kallsyms]) using_native_sched_clock perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_stable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_enable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) group_sched_in swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) event_sched_in.isra.107 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_set_state.part.71 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_time swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_pmu_disable swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_log_itrace_start swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) calc_timer_values swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_cpu swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) arch_perf_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __fentry__ swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) using_native_sched_clock swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_stable Signed-off-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Leo Yan <leo.yan@linaro.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Kim Phillips <kim.phillips@arm.com> Link: http://lkml.kernel.org/r/20180920180540.14039-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent d1b1552 commit 99f753f

File tree

4 files changed

+86
-19
lines changed

4 files changed

+86
-19
lines changed

tools/perf/Documentation/perf-script.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,10 @@ include::itrace.txt[]
397397
--call-ret-trace::
398398
Show call and return stream for intel_pt traces.
399399

400+
--graph-function::
401+
For itrace only show specified functions and their callees for
402+
itrace. Multiple functions can be separated by comma.
403+
400404
SEE ALSO
401405
--------
402406
linkperf:perf-record[1], linkperf:perf-script-perl[1],

tools/perf/builtin-script.c

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,14 +1105,42 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
11051105
return printed;
11061106
}
11071107

1108+
static const char *resolve_branch_sym(struct perf_sample *sample,
1109+
struct perf_evsel *evsel,
1110+
struct thread *thread,
1111+
struct addr_location *al,
1112+
u64 *ip)
1113+
{
1114+
struct addr_location addr_al;
1115+
struct perf_event_attr *attr = &evsel->attr;
1116+
const char *name = NULL;
1117+
1118+
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
1119+
if (sample_addr_correlates_sym(attr)) {
1120+
thread__resolve(thread, &addr_al, sample);
1121+
if (addr_al.sym)
1122+
name = addr_al.sym->name;
1123+
else
1124+
*ip = sample->addr;
1125+
} else {
1126+
*ip = sample->addr;
1127+
}
1128+
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
1129+
if (al->sym)
1130+
name = al->sym->name;
1131+
else
1132+
*ip = sample->ip;
1133+
}
1134+
return name;
1135+
}
1136+
11081137
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
11091138
struct perf_evsel *evsel,
11101139
struct thread *thread,
11111140
struct addr_location *al, FILE *fp)
11121141
{
11131142
struct perf_event_attr *attr = &evsel->attr;
11141143
size_t depth = thread_stack__depth(thread);
1115-
struct addr_location addr_al;
11161144
const char *name = NULL;
11171145
static int spacing;
11181146
int len = 0;
@@ -1126,22 +1154,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
11261154
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
11271155
depth += 1;
11281156

1129-
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
1130-
if (sample_addr_correlates_sym(attr)) {
1131-
thread__resolve(thread, &addr_al, sample);
1132-
if (addr_al.sym)
1133-
name = addr_al.sym->name;
1134-
else
1135-
ip = sample->addr;
1136-
} else {
1137-
ip = sample->addr;
1138-
}
1139-
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
1140-
if (al->sym)
1141-
name = al->sym->name;
1142-
else
1143-
ip = sample->ip;
1144-
}
1157+
name = resolve_branch_sym(sample, evsel, thread, al, &ip);
11451158

11461159
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
11471160
dlen += fprintf(fp, "(");
@@ -1647,6 +1660,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
16471660
}
16481661
}
16491662

1663+
static bool show_event(struct perf_sample *sample,
1664+
struct perf_evsel *evsel,
1665+
struct thread *thread,
1666+
struct addr_location *al)
1667+
{
1668+
int depth = thread_stack__depth(thread);
1669+
1670+
if (!symbol_conf.graph_function)
1671+
return true;
1672+
1673+
if (thread->filter) {
1674+
if (depth <= thread->filter_entry_depth) {
1675+
thread->filter = false;
1676+
return false;
1677+
}
1678+
return true;
1679+
} else {
1680+
const char *s = symbol_conf.graph_function;
1681+
u64 ip;
1682+
const char *name = resolve_branch_sym(sample, evsel, thread, al,
1683+
&ip);
1684+
unsigned nlen;
1685+
1686+
if (!name)
1687+
return false;
1688+
nlen = strlen(name);
1689+
while (*s) {
1690+
unsigned len = strcspn(s, ",");
1691+
if (nlen == len && !strncmp(name, s, len)) {
1692+
thread->filter = true;
1693+
thread->filter_entry_depth = depth;
1694+
return true;
1695+
}
1696+
s += len;
1697+
if (*s == ',')
1698+
s++;
1699+
}
1700+
return false;
1701+
}
1702+
}
1703+
16501704
static void process_event(struct perf_script *script,
16511705
struct perf_sample *sample, struct perf_evsel *evsel,
16521706
struct addr_location *al,
@@ -1661,6 +1715,9 @@ static void process_event(struct perf_script *script,
16611715
if (output[type].fields == 0)
16621716
return;
16631717

1718+
if (!show_event(sample, evsel, thread, al))
1719+
return;
1720+
16641721
++es->samples;
16651722

16661723
perf_sample__fprintf_start(sample, thread, evsel,
@@ -3237,6 +3294,8 @@ int cmd_script(int argc, const char **argv)
32373294
"Decode calls from from itrace", parse_call_trace),
32383295
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
32393296
"Decode calls and returns from itrace", parse_callret_trace),
3297+
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
3298+
"Only print symbols and callees with --call-trace/--call-ret-trace"),
32403299
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
32413300
"Stop display of callgraph at these symbols"),
32423301
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3494,7 +3553,8 @@ int cmd_script(int argc, const char **argv)
34943553
script.session = session;
34953554
script__setup_sample_type(&script);
34963555

3497-
if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
3556+
if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
3557+
symbol_conf.graph_function)
34983558
itrace_synth_opts.thread_stack = true;
34993559

35003560
session->itrace_synth_opts = &itrace_synth_opts;

tools/perf/util/symbol.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ struct symbol_conf {
123123
const char *vmlinux_name,
124124
*kallsyms_name,
125125
*source_prefix,
126-
*field_sep;
126+
*field_sep,
127+
*graph_function;
127128
const char *default_guest_vmlinux_name,
128129
*default_guest_kallsyms,
129130
*default_guest_modules;

tools/perf/util/thread.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ struct thread {
4242
void *addr_space;
4343
struct unwind_libunwind_ops *unwind_libunwind_ops;
4444
#endif
45+
bool filter;
46+
int filter_entry_depth;
4547
};
4648

4749
struct machine;

0 commit comments

Comments
 (0)