Skip to content

Commit 1cedee1

Browse files
rdnaborkmann
authored andcommitted
bpf: Hooks for sys_sendmsg
In addition to already existing BPF hooks for sys_bind and sys_connect, the patch provides new hooks for sys_sendmsg. It leverages existing BPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that provides access to socket itlself (properties like family, type, protocol) and user-passed `struct sockaddr *` so that BPF program can override destination IP and port for system calls such as sendto(2) or sendmsg(2) and/or assign source IP to the socket. The hooks are implemented as two new attach types: `BPF_CGROUP_UDP4_SENDMSG` and `BPF_CGROUP_UDP6_SENDMSG` for UDPv4 and UDPv6 correspondingly. UDPv4 and UDPv6 separate attach types for same reason as sys_bind and sys_connect hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. The difference with already existing hooks is sys_sendmsg are implemented only for unconnected UDP. For TCP it doesn't make sense to change user-provided `struct sockaddr *` at sendto(2)/sendmsg(2) time since socket either was already connected and has source/destination set or wasn't connected and call to sendto(2)/sendmsg(2) would lead to ENOTCONN anyway. Connected UDP is already handled by sys_connect hooks that can override source/destination at connect time and use fast-path later, i.e. these hooks don't affect UDP fast-path. Rewriting source IP is implemented differently than that in sys_connect hooks. When sys_sendmsg is used with unconnected UDP it doesn't work to just bind socket to desired local IP address since source IP can be set on per-packet basis by using ancillary data (cmsg(3)). So no matter if socket is bound or not, source IP has to be rewritten on every call to sys_sendmsg. To do so two new fields are added to UAPI `struct bpf_sock_addr`; * `msg_src_ip4` to set source IPv4 for UDPv4; * `msg_src_ip6` to set source IPv6 for UDPv6. Signed-off-by: Andrey Ignatov <rdna@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
1 parent 13193b0 commit 1cedee1

File tree

8 files changed

+125
-9
lines changed

8 files changed

+125
-9
lines changed

include/linux/bpf-cgroup.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
6666

6767
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
6868
struct sockaddr *uaddr,
69-
enum bpf_attach_type type);
69+
enum bpf_attach_type type,
70+
void *t_ctx);
7071

7172
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
7273
struct bpf_sock_ops_kern *sock_ops,
@@ -120,16 +121,18 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
120121
({ \
121122
int __ret = 0; \
122123
if (cgroup_bpf_enabled) \
123-
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
124+
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
125+
NULL); \
124126
__ret; \
125127
})
126128

127-
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
129+
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
128130
({ \
129131
int __ret = 0; \
130132
if (cgroup_bpf_enabled) { \
131133
lock_sock(sk); \
132-
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
134+
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
135+
t_ctx); \
133136
release_sock(sk); \
134137
} \
135138
__ret; \
@@ -151,10 +154,16 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
151154
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
152155

153156
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
154-
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
157+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
155158

156159
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
157-
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
160+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
161+
162+
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
163+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
164+
165+
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
166+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
158167

159168
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
160169
({ \
@@ -198,6 +207,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
198207
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
199208
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
200209
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
210+
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
211+
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
201212
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
202213
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
203214

include/linux/filter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ struct bpf_sock_addr_kern {
10101010
* only two (src and dst) are available at convert_ctx_access time
10111011
*/
10121012
u64 tmp_reg;
1013+
void *t_ctx; /* Attach type specific context. */
10131014
};
10141015

10151016
struct bpf_sock_ops_kern {

include/uapi/linux/bpf.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ enum bpf_attach_type {
160160
BPF_CGROUP_INET6_CONNECT,
161161
BPF_CGROUP_INET4_POST_BIND,
162162
BPF_CGROUP_INET6_POST_BIND,
163+
BPF_CGROUP_UDP4_SENDMSG,
164+
BPF_CGROUP_UDP6_SENDMSG,
163165
__MAX_BPF_ATTACH_TYPE
164166
};
165167

@@ -2363,6 +2365,12 @@ struct bpf_sock_addr {
23632365
__u32 family; /* Allows 4-byte read, but no write */
23642366
__u32 type; /* Allows 4-byte read, but no write */
23652367
__u32 protocol; /* Allows 4-byte read, but no write */
2368+
__u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
2369+
* Stored in network byte order.
2370+
*/
2371+
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
2372+
* Stored in network byte order.
2373+
*/
23662374
};
23672375

23682376
/* User bpf_sock_ops struct to access socket values and specify request ops

kernel/bpf/cgroup.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
500500
* @sk: sock struct that will use sockaddr
501501
* @uaddr: sockaddr struct provided by user
502502
* @type: The type of program to be exectuted
503+
* @t_ctx: Pointer to attach type specific context
503504
*
504505
* socket is expected to be of type INET or INET6.
505506
*
@@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
508509
*/
509510
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
510511
struct sockaddr *uaddr,
511-
enum bpf_attach_type type)
512+
enum bpf_attach_type type,
513+
void *t_ctx)
512514
{
513515
struct bpf_sock_addr_kern ctx = {
514516
.sk = sk,
515517
.uaddr = uaddr,
518+
.t_ctx = t_ctx,
516519
};
520+
struct sockaddr_storage unspec;
517521
struct cgroup *cgrp;
518522
int ret;
519523

@@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
523527
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
524528
return 0;
525529

530+
if (!ctx.uaddr) {
531+
memset(&unspec, 0, sizeof(unspec));
532+
ctx.uaddr = (struct sockaddr *)&unspec;
533+
}
534+
526535
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
527536
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
528537

kernel/bpf/syscall.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
12491249
case BPF_CGROUP_INET6_BIND:
12501250
case BPF_CGROUP_INET4_CONNECT:
12511251
case BPF_CGROUP_INET6_CONNECT:
1252+
case BPF_CGROUP_UDP4_SENDMSG:
1253+
case BPF_CGROUP_UDP6_SENDMSG:
12521254
return 0;
12531255
default:
12541256
return -EINVAL;
@@ -1565,6 +1567,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
15651567
case BPF_CGROUP_INET6_BIND:
15661568
case BPF_CGROUP_INET4_CONNECT:
15671569
case BPF_CGROUP_INET6_CONNECT:
1570+
case BPF_CGROUP_UDP4_SENDMSG:
1571+
case BPF_CGROUP_UDP6_SENDMSG:
15681572
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
15691573
break;
15701574
case BPF_CGROUP_SOCK_OPS:
@@ -1635,6 +1639,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
16351639
case BPF_CGROUP_INET6_BIND:
16361640
case BPF_CGROUP_INET4_CONNECT:
16371641
case BPF_CGROUP_INET6_CONNECT:
1642+
case BPF_CGROUP_UDP4_SENDMSG:
1643+
case BPF_CGROUP_UDP6_SENDMSG:
16381644
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
16391645
break;
16401646
case BPF_CGROUP_SOCK_OPS:
@@ -1692,6 +1698,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
16921698
case BPF_CGROUP_INET6_POST_BIND:
16931699
case BPF_CGROUP_INET4_CONNECT:
16941700
case BPF_CGROUP_INET6_CONNECT:
1701+
case BPF_CGROUP_UDP4_SENDMSG:
1702+
case BPF_CGROUP_UDP6_SENDMSG:
16951703
case BPF_CGROUP_SOCK_OPS:
16961704
case BPF_CGROUP_DEVICE:
16971705
break;

net/core/filter.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5299,6 +5299,7 @@ static bool sock_addr_is_valid_access(int off, int size,
52995299
switch (prog->expected_attach_type) {
53005300
case BPF_CGROUP_INET4_BIND:
53015301
case BPF_CGROUP_INET4_CONNECT:
5302+
case BPF_CGROUP_UDP4_SENDMSG:
53025303
break;
53035304
default:
53045305
return false;
@@ -5308,6 +5309,24 @@ static bool sock_addr_is_valid_access(int off, int size,
53085309
switch (prog->expected_attach_type) {
53095310
case BPF_CGROUP_INET6_BIND:
53105311
case BPF_CGROUP_INET6_CONNECT:
5312+
case BPF_CGROUP_UDP6_SENDMSG:
5313+
break;
5314+
default:
5315+
return false;
5316+
}
5317+
break;
5318+
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
5319+
switch (prog->expected_attach_type) {
5320+
case BPF_CGROUP_UDP4_SENDMSG:
5321+
break;
5322+
default:
5323+
return false;
5324+
}
5325+
break;
5326+
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
5327+
msg_src_ip6[3]):
5328+
switch (prog->expected_attach_type) {
5329+
case BPF_CGROUP_UDP6_SENDMSG:
53115330
break;
53125331
default:
53135332
return false;
@@ -5318,6 +5337,9 @@ static bool sock_addr_is_valid_access(int off, int size,
53185337
switch (off) {
53195338
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
53205339
case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5340+
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
5341+
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
5342+
msg_src_ip6[3]):
53215343
/* Only narrow read access allowed for now. */
53225344
if (type == BPF_READ) {
53235345
bpf_ctx_record_field_size(info, size_default);
@@ -6072,6 +6094,23 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
60726094
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
60736095
SK_FL_PROTO_SHIFT);
60746096
break;
6097+
6098+
case offsetof(struct bpf_sock_addr, msg_src_ip4):
6099+
/* Treat t_ctx as struct in_addr for msg_src_ip4. */
6100+
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6101+
struct bpf_sock_addr_kern, struct in_addr, t_ctx,
6102+
s_addr, BPF_SIZE(si->code), 0, tmp_reg);
6103+
break;
6104+
6105+
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
6106+
msg_src_ip6[3]):
6107+
off = si->off;
6108+
off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
6109+
/* Treat t_ctx as struct in6_addr for msg_src_ip6. */
6110+
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6111+
struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
6112+
s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
6113+
break;
60756114
}
60766115

60776116
return insn - insn_buf;

net/ipv4/udp.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
901901
{
902902
struct inet_sock *inet = inet_sk(sk);
903903
struct udp_sock *up = udp_sk(sk);
904+
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
904905
struct flowi4 fl4_stack;
905906
struct flowi4 *fl4;
906907
int ulen = len;
@@ -955,8 +956,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
955956
/*
956957
* Get and verify the address.
957958
*/
958-
if (msg->msg_name) {
959-
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
959+
if (usin) {
960960
if (msg->msg_namelen < sizeof(*usin))
961961
return -EINVAL;
962962
if (usin->sin_family != AF_INET) {
@@ -1010,6 +1010,22 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
10101010
rcu_read_unlock();
10111011
}
10121012

1013+
if (cgroup_bpf_enabled && !connected) {
1014+
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
1015+
(struct sockaddr *)usin, &ipc.addr);
1016+
if (err)
1017+
goto out_free;
1018+
if (usin) {
1019+
if (usin->sin_port == 0) {
1020+
/* BPF program set invalid port. Reject it. */
1021+
err = -EINVAL;
1022+
goto out_free;
1023+
}
1024+
daddr = usin->sin_addr.s_addr;
1025+
dport = usin->sin_port;
1026+
}
1027+
}
1028+
10131029
saddr = ipc.addr;
10141030
ipc.addr = faddr = daddr;
10151031

net/ipv6/udp.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,29 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
13161316
fl6.saddr = np->saddr;
13171317
fl6.fl6_sport = inet->inet_sport;
13181318

1319+
if (cgroup_bpf_enabled && !connected) {
1320+
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
1321+
(struct sockaddr *)sin6, &fl6.saddr);
1322+
if (err)
1323+
goto out_no_dst;
1324+
if (sin6) {
1325+
if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
1326+
/* BPF program rewrote IPv6-only by IPv4-mapped
1327+
* IPv6. It's currently unsupported.
1328+
*/
1329+
err = -ENOTSUPP;
1330+
goto out_no_dst;
1331+
}
1332+
if (sin6->sin6_port == 0) {
1333+
/* BPF program set invalid port. Reject it. */
1334+
err = -EINVAL;
1335+
goto out_no_dst;
1336+
}
1337+
fl6.fl6_dport = sin6->sin6_port;
1338+
fl6.daddr = sin6->sin6_addr;
1339+
}
1340+
}
1341+
13191342
final_p = fl6_update_dst(&fl6, opt, &final);
13201343
if (final_p)
13211344
connected = false;
@@ -1395,6 +1418,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
13951418

13961419
out:
13971420
dst_release(dst);
1421+
out_no_dst:
13981422
fl6_sock_release(flowlabel);
13991423
txopt_put(opt_to_free);
14001424
if (!err)

0 commit comments

Comments
 (0)