Skip to content

Commit 655c6b9

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A bunch of perf tooling fixes: - Make the Intel PT SQL viewer more robust - Make the Intel PT debug log more useful - Support weak groups in perf record so it's behaving the same way as perf stat - Display the LBR stats in callchain entries properly in perf top - Handle different PMu names with common prefix properlin in pert stat - Start syscall augmenting in perf trace. Preparation for architecture independent eBPF instrumentation of syscalls. - Fix build breakage in JVMTI perf lib - Fix arm64 tools build failure wrt smp_load_{acquire,release}" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf tools: Do not zero sample_id_all for group members perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so perf beauty: Use SRCARCH, ARCH=x86_64 must map to "x86" to find the headers perf intel-pt: Add MTC and CYC timestamps to debug log perf intel-pt: Add more event information to debug log perf scripts python: exported-sql-viewer.py: Fix table find when table re-ordered perf scripts python: exported-sql-viewer.py: Add help window perf scripts python: exported-sql-viewer.py: Add Selected branches report perf scripts python: exported-sql-viewer.py: Fall back to /usr/local/lib/libxed.so perf top: Display the LBR stats in callchain entry perf stat: Handle different PMU names with common prefix perf record: Support weak groups perf evlist: Move perf_evsel__reset_weak_group into evlist perf augmented_syscalls: Start collecting pathnames in the BPF program perf trace: Fix setting of augmented payload when using eBPF + raw_syscalls perf trace: When augmenting raw_syscalls plug raw_syscalls:sys_exit too perf examples bpf: Start augmenting raw_syscalls:sys_{start,exit} tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release}
2 parents 08b5278 + 45fd808 commit 655c6b9

File tree

19 files changed

+820
-121
lines changed

19 files changed

+820
-121
lines changed

tools/arch/arm64/include/asm/barrier.h

Lines changed: 67 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -14,74 +14,75 @@
1414
#define wmb() asm volatile("dmb ishst" ::: "memory")
1515
#define rmb() asm volatile("dmb ishld" ::: "memory")
1616

17-
#define smp_store_release(p, v) \
18-
do { \
19-
union { typeof(*p) __val; char __c[1]; } __u = \
20-
{ .__val = (__force typeof(*p)) (v) }; \
21-
\
22-
switch (sizeof(*p)) { \
23-
case 1: \
24-
asm volatile ("stlrb %w1, %0" \
25-
: "=Q" (*p) \
26-
: "r" (*(__u8 *)__u.__c) \
27-
: "memory"); \
28-
break; \
29-
case 2: \
30-
asm volatile ("stlrh %w1, %0" \
31-
: "=Q" (*p) \
32-
: "r" (*(__u16 *)__u.__c) \
33-
: "memory"); \
34-
break; \
35-
case 4: \
36-
asm volatile ("stlr %w1, %0" \
37-
: "=Q" (*p) \
38-
: "r" (*(__u32 *)__u.__c) \
39-
: "memory"); \
40-
break; \
41-
case 8: \
42-
asm volatile ("stlr %1, %0" \
43-
: "=Q" (*p) \
44-
: "r" (*(__u64 *)__u.__c) \
45-
: "memory"); \
46-
break; \
47-
default: \
48-
/* Only to shut up gcc ... */ \
49-
mb(); \
50-
break; \
51-
} \
17+
#define smp_store_release(p, v) \
18+
do { \
19+
union { typeof(*p) __val; char __c[1]; } __u = \
20+
{ .__val = (v) }; \
21+
\
22+
switch (sizeof(*p)) { \
23+
case 1: \
24+
asm volatile ("stlrb %w1, %0" \
25+
: "=Q" (*p) \
26+
: "r" (*(__u8_alias_t *)__u.__c) \
27+
: "memory"); \
28+
break; \
29+
case 2: \
30+
asm volatile ("stlrh %w1, %0" \
31+
: "=Q" (*p) \
32+
: "r" (*(__u16_alias_t *)__u.__c) \
33+
: "memory"); \
34+
break; \
35+
case 4: \
36+
asm volatile ("stlr %w1, %0" \
37+
: "=Q" (*p) \
38+
: "r" (*(__u32_alias_t *)__u.__c) \
39+
: "memory"); \
40+
break; \
41+
case 8: \
42+
asm volatile ("stlr %1, %0" \
43+
: "=Q" (*p) \
44+
: "r" (*(__u64_alias_t *)__u.__c) \
45+
: "memory"); \
46+
break; \
47+
default: \
48+
/* Only to shut up gcc ... */ \
49+
mb(); \
50+
break; \
51+
} \
5252
} while (0)
5353

54-
#define smp_load_acquire(p) \
55-
({ \
56-
union { typeof(*p) __val; char __c[1]; } __u; \
57-
\
58-
switch (sizeof(*p)) { \
59-
case 1: \
60-
asm volatile ("ldarb %w0, %1" \
61-
: "=r" (*(__u8 *)__u.__c) \
62-
: "Q" (*p) : "memory"); \
63-
break; \
64-
case 2: \
65-
asm volatile ("ldarh %w0, %1" \
66-
: "=r" (*(__u16 *)__u.__c) \
67-
: "Q" (*p) : "memory"); \
68-
break; \
69-
case 4: \
70-
asm volatile ("ldar %w0, %1" \
71-
: "=r" (*(__u32 *)__u.__c) \
72-
: "Q" (*p) : "memory"); \
73-
break; \
74-
case 8: \
75-
asm volatile ("ldar %0, %1" \
76-
: "=r" (*(__u64 *)__u.__c) \
77-
: "Q" (*p) : "memory"); \
78-
break; \
79-
default: \
80-
/* Only to shut up gcc ... */ \
81-
mb(); \
82-
break; \
83-
} \
84-
__u.__val; \
54+
#define smp_load_acquire(p) \
55+
({ \
56+
union { typeof(*p) __val; char __c[1]; } __u = \
57+
{ .__c = { 0 } }; \
58+
\
59+
switch (sizeof(*p)) { \
60+
case 1: \
61+
asm volatile ("ldarb %w0, %1" \
62+
: "=r" (*(__u8_alias_t *)__u.__c) \
63+
: "Q" (*p) : "memory"); \
64+
break; \
65+
case 2: \
66+
asm volatile ("ldarh %w0, %1" \
67+
: "=r" (*(__u16_alias_t *)__u.__c) \
68+
: "Q" (*p) : "memory"); \
69+
break; \
70+
case 4: \
71+
asm volatile ("ldar %w0, %1" \
72+
: "=r" (*(__u32_alias_t *)__u.__c) \
73+
: "Q" (*p) : "memory"); \
74+
break; \
75+
case 8: \
76+
asm volatile ("ldar %0, %1" \
77+
: "=r" (*(__u64_alias_t *)__u.__c) \
78+
: "Q" (*p) : "memory"); \
79+
break; \
80+
default: \
81+
/* Only to shut up gcc ... */ \
82+
mb(); \
83+
break; \
84+
} \
85+
__u.__val; \
8586
})
8687

8788
#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */

tools/perf/Documentation/perf-list.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ counted. The following modifiers exist:
5555
S - read sample value (PERF_SAMPLE_READ)
5656
D - pin the event to the PMU
5757
W - group is weak and will fallback to non-group if not schedulable,
58-
only supported in 'perf stat' for now.
5958

6059
The 'p' modifier can be used for specifying how precise the instruction
6160
address should be. The 'p' modifier can be specified multiple times:

tools/perf/Makefile.perf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH)
387387

388388
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
389389
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
390-
arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
390+
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
391391

392392
beauty_outdir := $(OUTPUT)trace/beauty/generated
393393
beauty_ioctl_outdir := $(beauty_outdir)/ioctl

tools/perf/builtin-record.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,12 @@ static int record__open(struct record *rec)
391391
ui__warning("%s\n", msg);
392392
goto try_again;
393393
}
394-
394+
if ((errno == EINVAL || errno == EBADF) &&
395+
pos->leader != pos &&
396+
pos->weak_group) {
397+
pos = perf_evlist__reset_weak_group(evlist, pos);
398+
goto try_again;
399+
}
395400
rc = -errno;
396401
perf_evsel__open_strerror(pos, &opts->target,
397402
errno, msg, sizeof(msg));

tools/perf/builtin-stat.c

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
383383
return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
384384
}
385385

386-
static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
387-
{
388-
struct perf_evsel *c2, *leader;
389-
bool is_open = true;
390-
391-
leader = evsel->leader;
392-
pr_debug("Weak group for %s/%d failed\n",
393-
leader->name, leader->nr_members);
394-
395-
/*
396-
* for_each_group_member doesn't work here because it doesn't
397-
* include the first entry.
398-
*/
399-
evlist__for_each_entry(evsel_list, c2) {
400-
if (c2 == evsel)
401-
is_open = false;
402-
if (c2->leader == leader) {
403-
if (is_open)
404-
perf_evsel__close(c2);
405-
c2->leader = c2;
406-
c2->nr_members = 0;
407-
}
408-
}
409-
return leader;
410-
}
411-
412386
static bool is_target_alive(struct target *_target,
413387
struct thread_map *threads)
414388
{
@@ -477,7 +451,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
477451
if ((errno == EINVAL || errno == EBADF) &&
478452
counter->leader != counter &&
479453
counter->weak_group) {
480-
counter = perf_evsel__reset_weak_group(counter);
454+
counter = perf_evlist__reset_weak_group(evsel_list, counter);
481455
goto try_again;
482456
}
483457

tools/perf/builtin-top.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv)
14291429
}
14301430
}
14311431

1432+
if (opts->branch_stack && callchain_param.enabled)
1433+
symbol_conf.show_branchflag_count = true;
1434+
14321435
sort__mode = SORT_MODE__TOP;
14331436
/* display thread wants entries to be collapsed in a different tree */
14341437
perf_hpp_list.need_collapse = 1;

tools/perf/builtin-trace.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ struct trace {
108108
} stats;
109109
unsigned int max_stack;
110110
unsigned int min_stack;
111+
bool raw_augmented_syscalls;
111112
bool not_ev_qualifier;
112113
bool live;
113114
bool full_time;
@@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
17241725
return printed;
17251726
}
17261727

1727-
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
1728+
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
17281729
{
17291730
void *augmented_args = NULL;
1731+
/*
1732+
* For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
1733+
* and there we get all 6 syscall args plus the tracepoint common
1734+
* fields (sizeof(long)) and the syscall_nr (another long). So we check
1735+
* if that is the case and if so don't look after the sc->args_size,
1736+
* but always after the full raw_syscalls:sys_enter payload, which is
1737+
* fixed.
1738+
*
1739+
* We'll revisit this later to pass s->args_size to the BPF augmenter
1740+
* (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
1741+
* copies only what we need for each syscall, like what happens when we
1742+
* use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
1743+
* traffic to just what is needed for each syscall.
1744+
*/
1745+
int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
17301746

1731-
*augmented_args_size = sample->raw_size - sc->args_size;
1747+
*augmented_args_size = sample->raw_size - args_size;
17321748
if (*augmented_args_size > 0)
1733-
augmented_args = sample->raw_data + sc->args_size;
1749+
augmented_args = sample->raw_data + args_size;
17341750

17351751
return augmented_args;
17361752
}
@@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
17801796
* here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
17811797
*/
17821798
if (evsel != trace->syscalls.events.sys_enter)
1783-
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
1799+
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
17841800
ttrace->entry_time = sample->time;
17851801
msg = ttrace->entry_str;
17861802
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
@@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
18331849
goto out_put;
18341850

18351851
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1836-
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
1852+
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
18371853
syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
18381854
fprintf(trace->output, "%s", msg);
18391855
err = 0;
@@ -3501,7 +3517,15 @@ int cmd_trace(int argc, const char **argv)
35013517
evsel->handler = trace__sys_enter;
35023518

35033519
evlist__for_each_entry(trace.evlist, evsel) {
3520+
bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
3521+
3522+
if (raw_syscalls_sys_exit) {
3523+
trace.raw_augmented_syscalls = true;
3524+
goto init_augmented_syscall_tp;
3525+
}
3526+
35043527
if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
3528+
init_augmented_syscall_tp:
35053529
perf_evsel__init_augmented_syscall_tp(evsel);
35063530
perf_evsel__init_augmented_syscall_tp_ret(evsel);
35073531
evsel->handler = trace__sys_exit;

0 commit comments

Comments
 (0)