Skip to content

Commit efe8eaf

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-4.20-20181025' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Introduce 'perf trace --max-events' for stopping 'perf trace' when that many syscalls (enter+exit), tracepoints or other events such as page faults take place. Support that as well on a per-event basis, e.g.: perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ Will stop when 2 context switches, 4 block plugs, 1 block unplug and 3 net_dev_queue tracepoints take place. (Arnaldo Carvalho de Melo) - Poll for monitored tasks being alive in 'perf stat -p/-t', exiting when those tasks all terminate (Jiri Olsa) - Encode -k clockid frequency into perf.data to enable timestamps derived metrics conversion into wall clock time on reporting stage. (Alexey Budankov) - Improve Intel PT call graph from SQL database and GUI python scripts, including adopting the Qt MDI interface to allow for multiple subwindows for all the tables, helping in better visualizing the data in the SQL tables, also uses, when available, the Intel XED disassembler libraries to present the Intel PT data as x86 asm mnemonics. This last feature is not currently working in some cases, fix is being discussed (Adrian Hunter) - Implement a ftrace function_graph view in 'perf script' when processing hardware trace data such as Intel PT (Andi Kleen) - Better integration with the Intel XED disassembler, when available, in 'perf script' (Andi Kleen) - Some 'perf trace' drop refcount fixes (Arnaldo Carvalho de Melo) - Add Sparc support to 'perf annotate', jitdump (David Miller) - Fix PLT symbols entry/header sizes properly on Sparc (David Miller) - Fix generation of system call table failure with /tmp mounted with 'noexec' in arm64 (Hongxu Jia) - Allow extended console debug output in 'perf script' (Milian Wolff) - Flush output stream after events in 'perf script' verbose mode (Milian Wolff) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
2 parents 034bda1 + fe57120 commit efe8eaf

38 files changed

+2778
-396
lines changed

tools/lib/subcmd/parse-options.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
116116
case OPTION_INTEGER:
117117
case OPTION_UINTEGER:
118118
case OPTION_LONG:
119+
case OPTION_ULONG:
119120
case OPTION_U64:
120121
default:
121122
break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
166167
case OPTION_INTEGER:
167168
case OPTION_UINTEGER:
168169
case OPTION_LONG:
170+
case OPTION_ULONG:
169171
case OPTION_U64:
170172
default:
171173
break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
295297
return opterror(opt, "expects a numerical value", flags);
296298
return 0;
297299

300+
case OPTION_ULONG:
301+
if (unset) {
302+
*(unsigned long *)opt->value = 0;
303+
return 0;
304+
}
305+
if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
306+
*(unsigned long *)opt->value = opt->defval;
307+
return 0;
308+
}
309+
if (get_arg(p, opt, flags, &arg))
310+
return -1;
311+
*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
312+
if (*s)
313+
return opterror(opt, "expects a numerical value", flags);
314+
return 0;
315+
298316
case OPTION_U64:
299317
if (unset) {
300318
*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
703721
case OPTION_ARGUMENT:
704722
break;
705723
case OPTION_LONG:
724+
case OPTION_ULONG:
706725
case OPTION_U64:
707726
case OPTION_INTEGER:
708727
case OPTION_UINTEGER:

tools/lib/subcmd/parse-options.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum parse_opt_type {
2525
OPTION_STRING,
2626
OPTION_INTEGER,
2727
OPTION_LONG,
28+
OPTION_ULONG,
2829
OPTION_CALLBACK,
2930
OPTION_U64,
3031
OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
133134
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
134135
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
135136
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
137+
#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
136138
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
137139
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
138140
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
For --xed the xed tool is needed. Here is how to install it:
3+
4+
$ git clone https://github.com/intelxed/mbuild.git mbuild
5+
$ git clone https://github.com/intelxed/xed
6+
$ cd xed
7+
$ ./mfile.py --share
8+
$ ./mfile.py examples
9+
$ sudo ./mfile.py --prefix=/usr/local install
10+
$ sudo ldconfig
11+
$ sudo cp obj/examples/xed /usr/local/bin
12+
13+
Basic xed testing:
14+
15+
$ xed | head -3
16+
ERROR: required argument(s) were missing
17+
Copyright (C) 2017, Intel Corporation. All rights reserved.
18+
XED version: [v10.0-328-g7d62c8c49b7b]
19+
$

tools/perf/Documentation/intel-pt.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ in transaction, respectively.
106106
While it is possible to create scripts to analyze the data, an alternative
107107
approach is available to export the data to a sqlite or postgresql database.
108108
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
109-
and to script call-graph-from-sql.py for an example of using the database.
109+
and to script exported-sql-viewer.py for an example of using the database.
110110

111111
There is also script intel-pt-events.py which provides an example of how to
112112
unpack the raw data for power events and PTWRITE.

tools/perf/Documentation/itrace.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
l synthesize last branch entries (use with i or x)
1212
s skip initial number of events
1313

14-
The default is all events i.e. the same as --itrace=ibxwpe
14+
The default is all events i.e. the same as --itrace=ibxwpe,
15+
except for perf script where it is --itrace=ce
1516

16-
In addition, the period (default 100000) for instructions events
17-
can be specified in units of:
17+
In addition, the period (default 100000, except for perf script where it is 1)
18+
for instructions events can be specified in units of:
1819

1920
i instructions
2021
t ticks

tools/perf/Documentation/perf-script.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,24 @@ include::itrace.txt[]
383383
will be printed. Each entry has function name and file/line. Enabled by
384384
default, disable with --no-inline.
385385

386+
--insn-trace::
387+
Show instruction stream for intel_pt traces. Combine with --xed to
388+
show disassembly.
389+
390+
--xed::
391+
Run xed disassembler on output. Requires installing the xed disassembler.
392+
393+
--call-trace::
394+
Show call stream for intel_pt traces. The CPUs are interleaved, but
395+
can be filtered with -C.
396+
397+
--call-ret-trace::
398+
Show call and return stream for intel_pt traces.
399+
400+
--graph-function::
401+
For itrace only show specified functions and their callees for
402+
itrace. Multiple functions can be separated by comma.
403+
386404
SEE ALSO
387405
--------
388406
linkperf:perf-record[1], linkperf:perf-script-perl[1],

tools/perf/Documentation/perf-trace.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
171171
--kernel-syscall-graph::
172172
Show the kernel callchains on the syscall exit path.
173173

174+
--max-events=N::
175+
Stop after processing N events. Note that strace-like events are considered
176+
only at exit time or when a syscall is interrupted, i.e. in those cases this
177+
option is equivalent to the number of lines printed.
178+
174179
--max-stack::
175180
Set the stack depth limit when parsing the callchain, anything
176181
beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
238243
As you can see, there was major pagefault in python process, from
239244
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
240245

246+
Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
247+
248+
$ perf trace -e open* --max-events 4
249+
[root@jouet perf]# trace -e open* --max-events 4
250+
2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
251+
2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
252+
3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
253+
4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
254+
$
255+
256+
Trace the first minor page fault when running a workload:
257+
258+
# perf trace -F min --max-stack=7 --max-events 1 sleep 1
259+
0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
260+
__clear_user ([kernel.kallsyms])
261+
load_elf_binary ([kernel.kallsyms])
262+
search_binary_handler ([kernel.kallsyms])
263+
__do_execve_file.isra.33 ([kernel.kallsyms])
264+
__x64_sys_execve ([kernel.kallsyms])
265+
do_syscall_64 ([kernel.kallsyms])
266+
entry_SYSCALL_64 ([kernel.kallsyms])
267+
#
268+
269+
Trace the next min page page fault to take place on the first CPU:
270+
271+
# perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
272+
0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
273+
js::gc::FreeSpan::initAsEmpty (inlined)
274+
js::gc::Arena::setAsNotAllocated (inlined)
275+
js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
276+
js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
277+
js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
278+
js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
279+
js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
280+
js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
281+
js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
282+
JSThinInlineString::new_<(js::AllowGC)1> (inlined)
283+
AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
284+
js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
285+
[0x18b26e6bc2bd] (/tmp/perf-17136.map)
286+
#
287+
288+
Trace the next two sched:sched_switch events, four block:*_plug events, the
289+
next block:*_unplug and the next three net:*dev_queue events, this last one
290+
with a backtrace of at most 16 entries, system wide:
291+
292+
# perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
293+
0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
294+
0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
295+
254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
296+
__dev_queue_xmit ([kernel.kallsyms])
297+
273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
298+
__dev_queue_xmit ([kernel.kallsyms])
299+
274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
300+
__dev_queue_xmit ([kernel.kallsyms])
301+
2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
302+
2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
303+
4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
304+
8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
305+
8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
306+
#
307+
241308
SEE ALSO
242309
--------
243310
linkperf:perf-record[1], linkperf:perf-script[1]

tools/perf/arch/arm64/entry/syscalls/mksyscalltbl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ create_table_from_c()
2323
{
2424
local sc nr last_sc
2525

26-
create_table_exe=`mktemp /tmp/create-table-XXXXXX`
26+
create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
2727

2828
{
2929

tools/perf/arch/sparc/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
ifndef NO_DWARF
22
PERF_HAVE_DWARF_REGS := 1
33
endif
4+
5+
PERF_HAVE_JITDUMP := 1
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
static int is_branch_cond(const char *cond)
4+
{
5+
if (cond[0] == '\0')
6+
return 1;
7+
8+
if (cond[0] == 'a' && cond[1] == '\0')
9+
return 1;
10+
11+
if (cond[0] == 'c' &&
12+
(cond[1] == 'c' || cond[1] == 's') &&
13+
cond[2] == '\0')
14+
return 1;
15+
16+
if (cond[0] == 'e' &&
17+
(cond[1] == '\0' ||
18+
(cond[1] == 'q' && cond[2] == '\0')))
19+
return 1;
20+
21+
if (cond[0] == 'g' &&
22+
(cond[1] == '\0' ||
23+
(cond[1] == 't' && cond[2] == '\0') ||
24+
(cond[1] == 'e' && cond[2] == '\0') ||
25+
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
26+
return 1;
27+
28+
if (cond[0] == 'l' &&
29+
(cond[1] == '\0' ||
30+
(cond[1] == 't' && cond[2] == '\0') ||
31+
(cond[1] == 'u' && cond[2] == '\0') ||
32+
(cond[1] == 'e' && cond[2] == '\0') ||
33+
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
34+
return 1;
35+
36+
if (cond[0] == 'n' &&
37+
(cond[1] == '\0' ||
38+
(cond[1] == 'e' && cond[2] == '\0') ||
39+
(cond[1] == 'z' && cond[2] == '\0') ||
40+
(cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
41+
return 1;
42+
43+
if (cond[0] == 'b' &&
44+
cond[1] == 'p' &&
45+
cond[2] == 'o' &&
46+
cond[3] == 's' &&
47+
cond[4] == '\0')
48+
return 1;
49+
50+
if (cond[0] == 'v' &&
51+
(cond[1] == 'c' || cond[1] == 's') &&
52+
cond[2] == '\0')
53+
return 1;
54+
55+
if (cond[0] == 'b' &&
56+
cond[1] == 'z' &&
57+
cond[2] == '\0')
58+
return 1;
59+
60+
return 0;
61+
}
62+
63+
static int is_branch_reg_cond(const char *cond)
64+
{
65+
if ((cond[0] == 'n' || cond[0] == 'l') &&
66+
cond[1] == 'z' &&
67+
cond[2] == '\0')
68+
return 1;
69+
70+
if (cond[0] == 'z' &&
71+
cond[1] == '\0')
72+
return 1;
73+
74+
if ((cond[0] == 'g' || cond[0] == 'l') &&
75+
cond[1] == 'e' &&
76+
cond[2] == 'z' &&
77+
cond[3] == '\0')
78+
return 1;
79+
80+
if (cond[0] == 'g' &&
81+
cond[1] == 'z' &&
82+
cond[2] == '\0')
83+
return 1;
84+
85+
return 0;
86+
}
87+
88+
static int is_branch_float_cond(const char *cond)
89+
{
90+
if (cond[0] == '\0')
91+
return 1;
92+
93+
if ((cond[0] == 'a' || cond[0] == 'e' ||
94+
cond[0] == 'z' || cond[0] == 'g' ||
95+
cond[0] == 'l' || cond[0] == 'n' ||
96+
cond[0] == 'o' || cond[0] == 'u') &&
97+
cond[1] == '\0')
98+
return 1;
99+
100+
if (((cond[0] == 'g' && cond[1] == 'e') ||
101+
(cond[0] == 'l' && (cond[1] == 'e' ||
102+
cond[1] == 'g')) ||
103+
(cond[0] == 'n' && (cond[1] == 'e' ||
104+
cond[1] == 'z')) ||
105+
(cond[0] == 'u' && (cond[1] == 'e' ||
106+
cond[1] == 'g' ||
107+
cond[1] == 'l'))) &&
108+
cond[2] == '\0')
109+
return 1;
110+
111+
if (cond[0] == 'u' &&
112+
(cond[1] == 'g' || cond[1] == 'l') &&
113+
cond[2] == 'e' &&
114+
cond[3] == '\0')
115+
return 1;
116+
117+
return 0;
118+
}
119+
120+
static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
121+
{
122+
struct ins_ops *ops = NULL;
123+
124+
if (!strcmp(name, "call") ||
125+
!strcmp(name, "jmp") ||
126+
!strcmp(name, "jmpl")) {
127+
ops = &call_ops;
128+
} else if (!strcmp(name, "ret") ||
129+
!strcmp(name, "retl") ||
130+
!strcmp(name, "return")) {
131+
ops = &ret_ops;
132+
} else if (!strcmp(name, "mov")) {
133+
ops = &mov_ops;
134+
} else {
135+
if (name[0] == 'c' &&
136+
(name[1] == 'w' || name[1] == 'x'))
137+
name += 2;
138+
139+
if (name[0] == 'b') {
140+
const char *cond = name + 1;
141+
142+
if (cond[0] == 'r') {
143+
if (is_branch_reg_cond(cond + 1))
144+
ops = &jump_ops;
145+
} else if (is_branch_cond(cond)) {
146+
ops = &jump_ops;
147+
}
148+
} else if (name[0] == 'f' && name[1] == 'b') {
149+
if (is_branch_float_cond(name + 2))
150+
ops = &jump_ops;
151+
}
152+
}
153+
154+
if (ops)
155+
arch__associate_ins_ops(arch, name, ops);
156+
157+
return ops;
158+
}
159+
160+
static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
161+
{
162+
if (!arch->initialized) {
163+
arch->initialized = true;
164+
arch->associate_instruction_ops = sparc__associate_instruction_ops;
165+
arch->objdump.comment_char = '#';
166+
}
167+
168+
return 0;
169+
}

0 commit comments

Comments
 (0)