Skip to content

Commit 1c14dc4

Browse files
committed
Merge branch 'bpf-add-support-for-sys-enter-exit-tracepoints'
Yonghong Song says: ==================== bpf: add support for sys_{enter|exit}_* tracepoints Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch set adds bpf support for these syscalls tracepoints and also adds a test case for it. Changelogs: v3 -> v4: - Check the legality of ctx offset access for syscall tracepoint as well. trace_event_get_offsets will return correct max offset for each specific syscall tracepoint. - Use variable length array to avoid hardcode 6 as the maximum arguments beyond syscall_nr. v2 -> v3: - Fix a build issue v1 -> v2: - Do not use TRACE_EVENT_FL_CAP_ANY to identify syscall tracepoint. Instead use trace_event_call->class. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents d226a2b + 1da236b commit 1c14dc4

File tree

6 files changed

+206
-6
lines changed

6 files changed

+206
-6
lines changed

include/linux/syscalls.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,20 @@ extern struct trace_event_functions exit_syscall_print_funcs;
172172
static struct syscall_metadata __used \
173173
__attribute__((section("__syscalls_metadata"))) \
174174
*__p_syscall_meta_##sname = &__syscall_meta_##sname;
175+
176+
static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
177+
{
178+
return tp_event->class == &event_class_syscall_enter ||
179+
tp_event->class == &event_class_syscall_exit;
180+
}
181+
175182
#else
176183
#define SYSCALL_METADATA(sname, nb, ...)
184+
185+
static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
186+
{
187+
return 0;
188+
}
177189
#endif
178190

179191
#define SYSCALL_DEFINE0(sname) \

kernel/events/core.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8050,7 +8050,7 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
80508050

80518051
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
80528052
{
8053-
bool is_kprobe, is_tracepoint;
8053+
bool is_kprobe, is_tracepoint, is_syscall_tp;
80548054
struct bpf_prog *prog;
80558055

80568056
if (event->attr.type != PERF_TYPE_TRACEPOINT)
@@ -8061,7 +8061,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
80618061

80628062
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
80638063
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
8064-
if (!is_kprobe && !is_tracepoint)
8064+
is_syscall_tp = is_syscall_trace_event(event->tp_event);
8065+
if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
80658066
/* bpf programs can only be attached to u/kprobe or tracepoint */
80668067
return -EINVAL;
80678068

@@ -8070,13 +8071,14 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
80708071
return PTR_ERR(prog);
80718072

80728073
if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
8073-
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
8074+
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
8075+
(is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
80748076
/* valid fd, but invalid bpf program type */
80758077
bpf_prog_put(prog);
80768078
return -EINVAL;
80778079
}
80788080

8079-
if (is_tracepoint) {
8081+
if (is_tracepoint || is_syscall_tp) {
80808082
int off = trace_event_get_offsets(event->tp_event);
80818083

80828084
if (prog->aux->max_ctx_offset > off) {

kernel/trace/trace_syscalls.c

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
559559
static int sys_perf_refcount_enter;
560560
static int sys_perf_refcount_exit;
561561

562+
static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
563+
struct syscall_metadata *sys_data,
564+
struct syscall_trace_enter *rec) {
565+
struct syscall_tp_t {
566+
unsigned long long regs;
567+
unsigned long syscall_nr;
568+
unsigned long args[sys_data->nb_args];
569+
} param;
570+
int i;
571+
572+
*(struct pt_regs **)&param = regs;
573+
param.syscall_nr = rec->nr;
574+
for (i = 0; i < sys_data->nb_args; i++)
575+
param.args[i] = rec->args[i];
576+
return trace_call_bpf(prog, &param);
577+
}
578+
562579
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
563580
{
564581
struct syscall_metadata *sys_data;
565582
struct syscall_trace_enter *rec;
566583
struct hlist_head *head;
584+
struct bpf_prog *prog;
567585
int syscall_nr;
568586
int rctx;
569587
int size;
@@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
578596
if (!sys_data)
579597
return;
580598

599+
prog = READ_ONCE(sys_data->enter_event->prog);
581600
head = this_cpu_ptr(sys_data->enter_event->perf_events);
582-
if (hlist_empty(head))
601+
if (!prog && hlist_empty(head))
583602
return;
584603

585604
/* get the size after alignment with the u32 buffer size field */
@@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
594613
rec->nr = syscall_nr;
595614
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
596615
(unsigned long *)&rec->args);
616+
617+
if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
618+
hlist_empty(head)) {
619+
perf_swevent_put_recursion_context(rctx);
620+
return;
621+
}
622+
597623
perf_trace_buf_submit(rec, size, rctx,
598624
sys_data->enter_event->event.type, 1, regs,
599625
head, NULL);
@@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call)
633659
mutex_unlock(&syscall_trace_lock);
634660
}
635661

662+
static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
663+
struct syscall_trace_exit *rec) {
664+
struct syscall_tp_t {
665+
unsigned long long regs;
666+
unsigned long syscall_nr;
667+
unsigned long ret;
668+
} param;
669+
670+
*(struct pt_regs **)&param = regs;
671+
param.syscall_nr = rec->nr;
672+
param.ret = rec->ret;
673+
return trace_call_bpf(prog, &param);
674+
}
675+
636676
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
637677
{
638678
struct syscall_metadata *sys_data;
639679
struct syscall_trace_exit *rec;
640680
struct hlist_head *head;
681+
struct bpf_prog *prog;
641682
int syscall_nr;
642683
int rctx;
643684
int size;
@@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
652693
if (!sys_data)
653694
return;
654695

696+
prog = READ_ONCE(sys_data->exit_event->prog);
655697
head = this_cpu_ptr(sys_data->exit_event->perf_events);
656-
if (hlist_empty(head))
698+
if (!prog && hlist_empty(head))
657699
return;
658700

659701
/* We can probably do that at build time */
@@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
666708

667709
rec->nr = syscall_nr;
668710
rec->ret = syscall_get_return_value(current, regs);
711+
712+
if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
713+
hlist_empty(head)) {
714+
perf_swevent_put_recursion_context(rctx);
715+
return;
716+
}
717+
669718
perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
670719
1, regs, head, NULL);
671720
}

samples/bpf/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example
3939
hostprogs-y += load_sock_ops
4040
hostprogs-y += xdp_redirect
4141
hostprogs-y += xdp_redirect_map
42+
hostprogs-y += syscall_tp
4243

4344
# Libbpf dependencies
4445
LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -82,6 +83,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
8283
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
8384
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
8485
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
86+
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
8587

8688
# Tell kbuild to always build the programs
8789
always := $(hostprogs-y)
@@ -125,6 +127,7 @@ always += tcp_iw_kern.o
125127
always += tcp_clamp_kern.o
126128
always += xdp_redirect_kern.o
127129
always += xdp_redirect_map_kern.o
130+
always += syscall_tp_kern.o
128131

129132
HOSTCFLAGS += -I$(objtree)/usr/include
130133
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -163,6 +166,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
163166
HOSTLOADLIBES_test_map_in_map += -lelf
164167
HOSTLOADLIBES_xdp_redirect += -lelf
165168
HOSTLOADLIBES_xdp_redirect_map += -lelf
169+
HOSTLOADLIBES_syscall_tp += -lelf
166170

167171
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
168172
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang

samples/bpf/syscall_tp_kern.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/* Copyright (c) 2017 Facebook
2+
*
3+
* This program is free software; you can redistribute it and/or
4+
* modify it under the terms of version 2 of the GNU General Public
5+
* License as published by the Free Software Foundation.
6+
*/
7+
#include <uapi/linux/bpf.h>
8+
#include "bpf_helpers.h"
9+
10+
struct syscalls_enter_open_args {
11+
unsigned long long unused;
12+
long syscall_nr;
13+
long filename_ptr;
14+
long flags;
15+
long mode;
16+
};
17+
18+
struct syscalls_exit_open_args {
19+
unsigned long long unused;
20+
long syscall_nr;
21+
long ret;
22+
};
23+
24+
struct bpf_map_def SEC("maps") enter_open_map = {
25+
.type = BPF_MAP_TYPE_ARRAY,
26+
.key_size = sizeof(u32),
27+
.value_size = sizeof(u32),
28+
.max_entries = 1,
29+
};
30+
31+
struct bpf_map_def SEC("maps") exit_open_map = {
32+
.type = BPF_MAP_TYPE_ARRAY,
33+
.key_size = sizeof(u32),
34+
.value_size = sizeof(u32),
35+
.max_entries = 1,
36+
};
37+
38+
static __always_inline void count(void *map)
39+
{
40+
u32 key = 0;
41+
u32 *value, init_val = 1;
42+
43+
value = bpf_map_lookup_elem(map, &key);
44+
if (value)
45+
*value += 1;
46+
else
47+
bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
48+
}
49+
50+
SEC("tracepoint/syscalls/sys_enter_open")
51+
int trace_enter_open(struct syscalls_enter_open_args *ctx)
52+
{
53+
count((void *)&enter_open_map);
54+
return 0;
55+
}
56+
57+
SEC("tracepoint/syscalls/sys_exit_open")
58+
int trace_enter_exit(struct syscalls_exit_open_args *ctx)
59+
{
60+
count((void *)&exit_open_map);
61+
return 0;
62+
}

samples/bpf/syscall_tp_user.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/* Copyright (c) 2017 Facebook
2+
*
3+
* This program is free software; you can redistribute it and/or
4+
* modify it under the terms of version 2 of the GNU General Public
5+
* License as published by the Free Software Foundation.
6+
*/
7+
#include <stdio.h>
8+
#include <unistd.h>
9+
#include <fcntl.h>
10+
#include <stdlib.h>
11+
#include <signal.h>
12+
#include <linux/bpf.h>
13+
#include <string.h>
14+
#include <linux/perf_event.h>
15+
#include <errno.h>
16+
#include <assert.h>
17+
#include <stdbool.h>
18+
#include <sys/resource.h>
19+
#include "libbpf.h"
20+
#include "bpf_load.h"
21+
22+
/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
23+
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
24+
*/
25+
26+
static void verify_map(int map_id)
27+
{
28+
__u32 key = 0;
29+
__u32 val;
30+
31+
if (bpf_map_lookup_elem(map_id, &key, &val) != 0) {
32+
fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
33+
return;
34+
}
35+
if (val == 0)
36+
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
37+
}
38+
39+
int main(int argc, char **argv)
40+
{
41+
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
42+
char filename[256];
43+
int fd;
44+
45+
setrlimit(RLIMIT_MEMLOCK, &r);
46+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
47+
48+
if (load_bpf_file(filename)) {
49+
fprintf(stderr, "%s", bpf_log_buf);
50+
return 1;
51+
}
52+
53+
/* current load_bpf_file has perf_event_open default pid = -1
54+
* and cpu = 0, which permits attached bpf execution on
55+
* all cpus for all pid's. bpf program execution ignores
56+
* cpu affinity.
57+
*/
58+
/* trigger some "open" operations */
59+
fd = open(filename, O_RDONLY);
60+
if (fd < 0) {
61+
fprintf(stderr, "open failed: %s\n", strerror(errno));
62+
return 1;
63+
}
64+
close(fd);
65+
66+
/* verify the map */
67+
verify_map(map_fd[0]);
68+
verify_map(map_fd[1]);
69+
70+
return 0;
71+
}

0 commit comments

Comments
 (0)