Skip to content

Commit 3911169

Browse files
Alexei Starovoitovdavem330
authored andcommitted
samples: bpf: add bpf_perf_event_output example
Performance test and example of bpf_perf_event_output(). kprobe is attached to sys_write() and trivial bpf program streams pid+cookie into userspace via PERF_COUNT_SW_BPF_OUTPUT event. Usage: $ sudo ./bld_x64/samples/bpf/trace_output recv 2968913 events per sec Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent a43eec3 commit 3911169

File tree

4 files changed

+236
-0
lines changed

4 files changed

+236
-0
lines changed

samples/bpf/Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ hostprogs-y += tracex3
1313
hostprogs-y += tracex4
1414
hostprogs-y += tracex5
1515
hostprogs-y += tracex6
16+
hostprogs-y += trace_output
1617
hostprogs-y += lathist
1718

1819
test_verifier-objs := test_verifier.o libbpf.o
@@ -27,6 +28,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
2728
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
2829
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
2930
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
31+
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
3032
lathist-objs := bpf_load.o libbpf.o lathist_user.o
3133

3234
# Tell kbuild to always build the programs
@@ -40,6 +42,7 @@ always += tracex3_kern.o
4042
always += tracex4_kern.o
4143
always += tracex5_kern.o
4244
always += tracex6_kern.o
45+
always += trace_output_kern.o
4346
always += tcbpf1_kern.o
4447
always += lathist_kern.o
4548

@@ -55,6 +58,7 @@ HOSTLOADLIBES_tracex3 += -lelf
5558
HOSTLOADLIBES_tracex4 += -lelf -lrt
5659
HOSTLOADLIBES_tracex5 += -lelf
5760
HOSTLOADLIBES_tracex6 += -lelf
61+
HOSTLOADLIBES_trace_output += -lelf -lrt
5862
HOSTLOADLIBES_lathist += -lelf
5963

6064
# point this to your LLVM backend with bpf support
@@ -64,3 +68,6 @@ $(obj)/%.o: $(src)/%.c
6468
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
6569
-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
6670
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
71+
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
72+
-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
73+
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s

samples/bpf/bpf_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
3737
(void *) BPF_FUNC_clone_redirect;
3838
static int (*bpf_redirect)(int ifindex, int flags) =
3939
(void *) BPF_FUNC_redirect;
40+
static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
41+
(void *) BPF_FUNC_perf_event_output;
4042

4143
/* llvm builtin functions that eBPF C program may use to
4244
* emit BPF_LD_ABS and BPF_LD_IND instructions

samples/bpf/trace_output_kern.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include <linux/ptrace.h>
2+
#include <linux/version.h>
3+
#include <uapi/linux/bpf.h>
4+
#include "bpf_helpers.h"
5+
6+
struct bpf_map_def SEC("maps") my_map = {
7+
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
8+
.key_size = sizeof(int),
9+
.value_size = sizeof(u32),
10+
.max_entries = 2,
11+
};
12+
13+
SEC("kprobe/sys_write")
14+
int bpf_prog1(struct pt_regs *ctx)
15+
{
16+
struct S {
17+
u64 pid;
18+
u64 cookie;
19+
} data;
20+
21+
memset(&data, 0, sizeof(data));
22+
data.pid = bpf_get_current_pid_tgid();
23+
data.cookie = 0x12345678;
24+
25+
bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
26+
27+
return 0;
28+
}
29+
30+
char _license[] SEC("license") = "GPL";
31+
u32 _version SEC("version") = LINUX_VERSION_CODE;

samples/bpf/trace_output_user.c

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/* This program is free software; you can redistribute it and/or
2+
* modify it under the terms of version 2 of the GNU General Public
3+
* License as published by the Free Software Foundation.
4+
*/
5+
#include <stdio.h>
6+
#include <unistd.h>
7+
#include <stdlib.h>
8+
#include <stdbool.h>
9+
#include <string.h>
10+
#include <fcntl.h>
11+
#include <poll.h>
12+
#include <sys/ioctl.h>
13+
#include <linux/perf_event.h>
14+
#include <linux/bpf.h>
15+
#include <errno.h>
16+
#include <assert.h>
17+
#include <sys/syscall.h>
18+
#include <sys/ioctl.h>
19+
#include <sys/mman.h>
20+
#include <time.h>
21+
#include <signal.h>
22+
#include "libbpf.h"
23+
#include "bpf_load.h"
24+
25+
static int pmu_fd;
26+
27+
int page_size;
28+
int page_cnt = 8;
29+
volatile struct perf_event_mmap_page *header;
30+
31+
typedef void (*print_fn)(void *data, int size);
32+
33+
static int perf_event_mmap(int fd)
34+
{
35+
void *base;
36+
int mmap_size;
37+
38+
page_size = getpagesize();
39+
mmap_size = page_size * (page_cnt + 1);
40+
41+
base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
42+
if (base == MAP_FAILED) {
43+
printf("mmap err\n");
44+
return -1;
45+
}
46+
47+
header = base;
48+
return 0;
49+
}
50+
51+
static int perf_event_poll(int fd)
52+
{
53+
struct pollfd pfd = { .fd = fd, .events = POLLIN };
54+
55+
return poll(&pfd, 1, 1000);
56+
}
57+
58+
struct perf_event_sample {
59+
struct perf_event_header header;
60+
__u32 size;
61+
char data[];
62+
};
63+
64+
void perf_event_read(print_fn fn)
65+
{
66+
__u64 data_tail = header->data_tail;
67+
__u64 data_head = header->data_head;
68+
__u64 buffer_size = page_cnt * page_size;
69+
void *base, *begin, *end;
70+
char buf[256];
71+
72+
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
73+
if (data_head == data_tail)
74+
return;
75+
76+
base = ((char *)header) + page_size;
77+
78+
begin = base + data_tail % buffer_size;
79+
end = base + data_head % buffer_size;
80+
81+
while (begin != end) {
82+
struct perf_event_sample *e;
83+
84+
e = begin;
85+
if (begin + e->header.size > base + buffer_size) {
86+
long len = base + buffer_size - begin;
87+
88+
assert(len < e->header.size);
89+
memcpy(buf, begin, len);
90+
memcpy(buf + len, base, e->header.size - len);
91+
e = (void *) buf;
92+
begin = base + e->header.size - len;
93+
} else if (begin + e->header.size == base + buffer_size) {
94+
begin = base;
95+
} else {
96+
begin += e->header.size;
97+
}
98+
99+
if (e->header.type == PERF_RECORD_SAMPLE) {
100+
fn(e->data, e->size);
101+
} else if (e->header.type == PERF_RECORD_LOST) {
102+
struct {
103+
struct perf_event_header header;
104+
__u64 id;
105+
__u64 lost;
106+
} *lost = (void *) e;
107+
printf("lost %lld events\n", lost->lost);
108+
} else {
109+
printf("unknown event type=%d size=%d\n",
110+
e->header.type, e->header.size);
111+
}
112+
}
113+
114+
__sync_synchronize(); /* smp_mb() */
115+
header->data_tail = data_head;
116+
}
117+
118+
static __u64 time_get_ns(void)
119+
{
120+
struct timespec ts;
121+
122+
clock_gettime(CLOCK_MONOTONIC, &ts);
123+
return ts.tv_sec * 1000000000ull + ts.tv_nsec;
124+
}
125+
126+
static __u64 start_time;
127+
128+
#define MAX_CNT 100000ll
129+
130+
static void print_bpf_output(void *data, int size)
131+
{
132+
static __u64 cnt;
133+
struct {
134+
__u64 pid;
135+
__u64 cookie;
136+
} *e = data;
137+
138+
if (e->cookie != 0x12345678) {
139+
printf("BUG pid %llx cookie %llx sized %d\n",
140+
e->pid, e->cookie, size);
141+
kill(0, SIGINT);
142+
}
143+
144+
cnt++;
145+
146+
if (cnt == MAX_CNT) {
147+
printf("recv %lld events per sec\n",
148+
MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
149+
kill(0, SIGINT);
150+
}
151+
}
152+
153+
static void test_bpf_perf_event(void)
154+
{
155+
struct perf_event_attr attr = {
156+
.sample_type = PERF_SAMPLE_RAW,
157+
.type = PERF_TYPE_SOFTWARE,
158+
.config = PERF_COUNT_SW_BPF_OUTPUT,
159+
};
160+
int key = 0;
161+
162+
pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
163+
164+
assert(pmu_fd >= 0);
165+
assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
166+
ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
167+
}
168+
169+
int main(int argc, char **argv)
170+
{
171+
char filename[256];
172+
FILE *f;
173+
174+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
175+
176+
if (load_bpf_file(filename)) {
177+
printf("%s", bpf_log_buf);
178+
return 1;
179+
}
180+
181+
test_bpf_perf_event();
182+
183+
if (perf_event_mmap(pmu_fd) < 0)
184+
return 1;
185+
186+
f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
187+
(void) f;
188+
189+
start_time = time_get_ns();
190+
for (;;) {
191+
perf_event_poll(pmu_fd);
192+
perf_event_read(print_bpf_output);
193+
}
194+
195+
return 0;
196+
}

0 commit comments

Comments
 (0)