Skip to content

Commit 93d10af

Browse files
olsajiriacmel
authored andcommitted
perf tools: Optimize sample parsing for ordered events
Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent dc83e13 commit 93d10af

File tree

2 files changed

+22
-27
lines changed

2 files changed

+22
-27
lines changed

tools/perf/builtin-kvm.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
741741
u64 *mmap_time)
742742
{
743743
union perf_event *event;
744-
struct perf_sample sample;
744+
u64 timestamp;
745745
s64 n = 0;
746746
int err;
747747

748748
*mmap_time = ULLONG_MAX;
749749
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
750-
err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
750+
err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
751751
if (err) {
752752
perf_evlist__mmap_consume(kvm->evlist, idx);
753753
pr_err("Failed to parse sample\n");
754754
return -1;
755755
}
756756

757-
err = perf_session__queue_event(kvm->session, event, sample.time, 0);
757+
err = perf_session__queue_event(kvm->session, event, timestamp, 0);
758758
/*
759759
* FIXME: Here we can't consume the event, as perf_session__queue_event will
760760
* point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
768768

769769
/* save time stamp of our first sample for this mmap */
770770
if (n == 0)
771-
*mmap_time = sample.time;
771+
*mmap_time = timestamp;
772772

773773
/* limit events per mmap handled all at once */
774774
n++;

tools/perf/util/session.c

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
static int perf_session__deliver_event(struct perf_session *session,
2929
union perf_event *event,
30-
struct perf_sample *sample,
3130
struct perf_tool *tool,
3231
u64 file_offset);
3332

@@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
107106
static int ordered_events__deliver_event(struct ordered_events *oe,
108107
struct ordered_event *event)
109108
{
110-
struct perf_sample sample;
111109
struct perf_session *session = container_of(oe, struct perf_session,
112110
ordered_events);
113-
int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
114-
115-
if (ret) {
116-
pr_err("Can't parse sample, err = %d\n", ret);
117-
return ret;
118-
}
119111

120-
return perf_session__deliver_event(session, event->event, &sample,
112+
return perf_session__deliver_event(session, event->event,
121113
session->tool, event->file_offset);
122114
}
123115

@@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,
13281320

13291321
static int perf_session__deliver_event(struct perf_session *session,
13301322
union perf_event *event,
1331-
struct perf_sample *sample,
13321323
struct perf_tool *tool,
13331324
u64 file_offset)
13341325
{
1326+
struct perf_sample sample;
13351327
int ret;
13361328

1337-
ret = auxtrace__process_event(session, event, sample, tool);
1329+
ret = perf_evlist__parse_sample(session->evlist, event, &sample);
1330+
if (ret) {
1331+
pr_err("Can't parse sample, err = %d\n", ret);
1332+
return ret;
1333+
}
1334+
1335+
ret = auxtrace__process_event(session, event, &sample, tool);
13381336
if (ret < 0)
13391337
return ret;
13401338
if (ret > 0)
13411339
return 0;
13421340

13431341
return machines__deliver_event(&session->machines, session->evlist,
1344-
event, sample, tool, file_offset);
1342+
event, &sample, tool, file_offset);
13451343
}
13461344

13471345
static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session,
14951493
{
14961494
struct perf_evlist *evlist = session->evlist;
14971495
struct perf_tool *tool = session->tool;
1498-
struct perf_sample sample;
14991496
int ret;
15001497

15011498
if (session->header.needs_swap)
@@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session,
15091506
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
15101507
return perf_session__process_user_event(session, event, file_offset);
15111508

1512-
/*
1513-
* For all kernel events we get the sample data
1514-
*/
1515-
ret = perf_evlist__parse_sample(evlist, event, &sample);
1516-
if (ret)
1517-
return ret;
1518-
15191509
if (tool->ordered_events) {
1520-
ret = perf_session__queue_event(session, event, sample.time, file_offset);
1510+
u64 timestamp;
1511+
1512+
ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
1513+
if (ret)
1514+
return ret;
1515+
1516+
ret = perf_session__queue_event(session, event, timestamp, file_offset);
15211517
if (ret != -ETIME)
15221518
return ret;
15231519
}
15241520

1525-
return perf_session__deliver_event(session, event, &sample, tool,
1526-
file_offset);
1521+
return perf_session__deliver_event(session, event, tool, file_offset);
15271522
}
15281523

15291524
void perf_event_header__bswap(struct perf_event_header *hdr)

0 commit comments

Comments
 (0)