Skip to content

Commit eefc06b

Browse files
committed
Merge branch 'bpf-probe-write-user'
Sargun Dhillon says: ==================== bpf: add bpf_probe_write_user helper & example This patch series contains two patches that add support for a probe_write helper to BPF programs. This allows them to manipulate user memory during the course of tracing. The second patch in the series has an example that uses it, in one the intended ways to divert execution. Thanks to Alexei Starovoitov, and Daniel Borkmann for being patient, review, and helping me get familiar with the code base. I've made changes based on their recommendations. This helper should be considered for experimental usage and debugging, so we print a warning to dmesg when it is along with the command and pid when someone tries to install a proglet that uses it. A follow-up patchset will contain a mechanism to verify the safety of the probe beyond what was done by hand. ---- v1->v2: restrict writing to user space, as opposed to globally v2->v3: Fixed formatting issues v3->v4: Rename copy_to_user -> bpf_probe_write Simplify checking of whether or not it's safe to write Add warnings to dmesg v4->v5: Raise warning level Cleanup location of warning code Make test fail when helper is broken v5->v6: General formatting cleanup Rename bpf_probe_write -> bpf_probe_write_user v6->v7: More formatting cleanup. Clarifying a few comments Clarified log message ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 9b022a6 + cf9b119 commit eefc06b

File tree

6 files changed

+191
-0
lines changed

6 files changed

+191
-0
lines changed

include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,16 @@ enum bpf_func_id {
365365
*/
366366
BPF_FUNC_get_current_task,
367367

368+
/**
369+
* bpf_probe_write_user(void *dst, void *src, int len)
370+
* safely attempt to write to a location
371+
* @dst: destination address in userspace
372+
* @src: source address on stack
373+
* @len: number of bytes to copy
374+
* Return: 0 on success or negative error
375+
*/
376+
BPF_FUNC_probe_write_user,
377+
368378
__BPF_FUNC_MAX_ID,
369379
};
370380

kernel/trace/bpf_trace.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,49 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
8181
.arg3_type = ARG_ANYTHING,
8282
};
8383

84+
static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
85+
{
86+
void *unsafe_ptr = (void *) (long) r1;
87+
void *src = (void *) (long) r2;
88+
int size = (int) r3;
89+
90+
/*
91+
* Ensure we're in user context which is safe for the helper to
92+
* run. This helper has no business in a kthread.
93+
*
94+
* access_ok() should prevent writing to non-user memory, but in
95+
* some situations (nommu, temporary switch, etc) access_ok() does
96+
* not provide enough validation, hence the check on KERNEL_DS.
97+
*/
98+
99+
if (unlikely(in_interrupt() ||
100+
current->flags & (PF_KTHREAD | PF_EXITING)))
101+
return -EPERM;
102+
if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
103+
return -EPERM;
104+
if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
105+
return -EPERM;
106+
107+
return probe_kernel_write(unsafe_ptr, src, size);
108+
}
109+
110+
static const struct bpf_func_proto bpf_probe_write_user_proto = {
111+
.func = bpf_probe_write_user,
112+
.gpl_only = true,
113+
.ret_type = RET_INTEGER,
114+
.arg1_type = ARG_ANYTHING,
115+
.arg2_type = ARG_PTR_TO_STACK,
116+
.arg3_type = ARG_CONST_STACK_SIZE,
117+
};
118+
119+
static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
120+
{
121+
pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
122+
current->comm, task_pid_nr(current));
123+
124+
return &bpf_probe_write_user_proto;
125+
}
126+
84127
/*
85128
* limited trace_printk()
86129
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
@@ -362,6 +405,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
362405
return &bpf_get_smp_processor_id_proto;
363406
case BPF_FUNC_perf_event_read:
364407
return &bpf_perf_event_read_proto;
408+
case BPF_FUNC_probe_write_user:
409+
return bpf_get_probe_write_proto();
365410
default:
366411
return NULL;
367412
}

samples/bpf/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ hostprogs-y += tracex3
1414
hostprogs-y += tracex4
1515
hostprogs-y += tracex5
1616
hostprogs-y += tracex6
17+
hostprogs-y += test_probe_write_user
1718
hostprogs-y += trace_output
1819
hostprogs-y += lathist
1920
hostprogs-y += offwaketime
@@ -37,6 +38,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
3738
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
3839
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
3940
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
41+
test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o
4042
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
4143
lathist-objs := bpf_load.o libbpf.o lathist_user.o
4244
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
@@ -59,6 +61,7 @@ always += tracex3_kern.o
5961
always += tracex4_kern.o
6062
always += tracex5_kern.o
6163
always += tracex6_kern.o
64+
always += test_probe_write_user_kern.o
6265
always += trace_output_kern.o
6366
always += tcbpf1_kern.o
6467
always += lathist_kern.o
@@ -85,6 +88,7 @@ HOSTLOADLIBES_tracex3 += -lelf
8588
HOSTLOADLIBES_tracex4 += -lelf -lrt
8689
HOSTLOADLIBES_tracex5 += -lelf
8790
HOSTLOADLIBES_tracex6 += -lelf
91+
HOSTLOADLIBES_test_probe_write_user += -lelf
8892
HOSTLOADLIBES_trace_output += -lelf -lrt
8993
HOSTLOADLIBES_lathist += -lelf
9094
HOSTLOADLIBES_offwaketime += -lelf

samples/bpf/bpf_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data,
4141
(void *) BPF_FUNC_perf_event_output;
4242
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
4343
(void *) BPF_FUNC_get_stackid;
44+
static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
45+
(void *) BPF_FUNC_probe_write_user;
4446

4547
/* llvm builtin functions that eBPF C program may use to
4648
* emit BPF_LD_ABS and BPF_LD_IND instructions
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
2+
*
3+
* This program is free software; you can redistribute it and/or
4+
* modify it under the terms of version 2 of the GNU General Public
5+
* License as published by the Free Software Foundation.
6+
*/
7+
#include <linux/skbuff.h>
8+
#include <linux/netdevice.h>
9+
#include <uapi/linux/bpf.h>
10+
#include <linux/version.h>
11+
#include "bpf_helpers.h"
12+
13+
struct bpf_map_def SEC("maps") dnat_map = {
14+
.type = BPF_MAP_TYPE_HASH,
15+
.key_size = sizeof(struct sockaddr_in),
16+
.value_size = sizeof(struct sockaddr_in),
17+
.max_entries = 256,
18+
};
19+
20+
/* kprobe is NOT a stable ABI
21+
* kernel functions can be removed, renamed or completely change semantics.
22+
* Number of arguments and their positions can change, etc.
23+
* In such case this bpf+kprobe example will no longer be meaningful
24+
*
25+
* This example sits on a syscall, and the syscall ABI is relatively stable
26+
* of course, across platforms, and over time, the ABI may change.
27+
*/
28+
SEC("kprobe/sys_connect")
29+
int bpf_prog1(struct pt_regs *ctx)
30+
{
31+
struct sockaddr_in new_addr, orig_addr = {};
32+
struct sockaddr_in *mapped_addr;
33+
void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
34+
int sockaddr_len = (int)PT_REGS_PARM3(ctx);
35+
36+
if (sockaddr_len > sizeof(orig_addr))
37+
return 0;
38+
39+
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
40+
return 0;
41+
42+
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
43+
if (mapped_addr != NULL) {
44+
memcpy(&new_addr, mapped_addr, sizeof(new_addr));
45+
bpf_probe_write_user(sockaddr_arg, &new_addr,
46+
sizeof(new_addr));
47+
}
48+
return 0;
49+
}
50+
51+
char _license[] SEC("license") = "GPL";
52+
u32 _version SEC("version") = LINUX_VERSION_CODE;
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#include <stdio.h>
2+
#include <assert.h>
3+
#include <linux/bpf.h>
4+
#include <unistd.h>
5+
#include "libbpf.h"
6+
#include "bpf_load.h"
7+
#include <sys/socket.h>
8+
#include <string.h>
9+
#include <netinet/in.h>
10+
#include <arpa/inet.h>
11+
12+
int main(int ac, char **argv)
13+
{
14+
int serverfd, serverconnfd, clientfd;
15+
socklen_t sockaddr_len;
16+
struct sockaddr serv_addr, mapped_addr, tmp_addr;
17+
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
18+
char filename[256];
19+
char *ip;
20+
21+
serv_addr_in = (struct sockaddr_in *)&serv_addr;
22+
mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
23+
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
24+
25+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
26+
27+
if (load_bpf_file(filename)) {
28+
printf("%s", bpf_log_buf);
29+
return 1;
30+
}
31+
32+
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
33+
assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
34+
35+
/* Bind server to ephemeral port on lo */
36+
memset(&serv_addr, 0, sizeof(serv_addr));
37+
serv_addr_in->sin_family = AF_INET;
38+
serv_addr_in->sin_port = 0;
39+
serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
40+
41+
assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
42+
43+
sockaddr_len = sizeof(serv_addr);
44+
assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
45+
ip = inet_ntoa(serv_addr_in->sin_addr);
46+
printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
47+
48+
memset(&mapped_addr, 0, sizeof(mapped_addr));
49+
mapped_addr_in->sin_family = AF_INET;
50+
mapped_addr_in->sin_port = htons(5555);
51+
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
52+
53+
assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
54+
55+
assert(listen(serverfd, 5) == 0);
56+
57+
ip = inet_ntoa(mapped_addr_in->sin_addr);
58+
printf("Client connecting to: %s:%d\n",
59+
ip, ntohs(mapped_addr_in->sin_port));
60+
assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
61+
62+
sockaddr_len = sizeof(tmp_addr);
63+
ip = inet_ntoa(tmp_addr_in->sin_addr);
64+
assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
65+
printf("Server received connection from: %s:%d\n",
66+
ip, ntohs(tmp_addr_in->sin_port));
67+
68+
sockaddr_len = sizeof(tmp_addr);
69+
assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
70+
ip = inet_ntoa(tmp_addr_in->sin_addr);
71+
printf("Client's peer address: %s:%d\n",
72+
ip, ntohs(tmp_addr_in->sin_port));
73+
74+
/* Is the server's getsockname = the socket getpeername */
75+
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
76+
77+
return 0;
78+
}

0 commit comments

Comments
 (0)