Skip to content

Commit fad0c40

Browse files
committed
Merge branch 'bpf-sockmap-estab-fixes'
John Fastabend says: ==================== Eric noted that using the close callback is not sufficient to catch all transitions from ESTABLISHED state to a LISTEN state. So this series does two things. First, only allow adding socks in ESTABLISH state and second use unhash callback to catch tcp_disconnect() transitions. v2: added check for ESTABLISH state in hash update sockmap as well v3: Do not release lock from unhash in error path, no lock was used in the first place. And drop not so useful code comments v4: convert, if (unhash()) return unhash(); return to if (unhash()) unhash(); return; Thanks for reviewing Yonghong I carried your ACKs forward. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2 parents 080220b + 5028027 commit fad0c40

File tree

2 files changed

+78
-23
lines changed

2 files changed

+78
-23
lines changed

kernel/bpf/sockmap.c

Lines changed: 71 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ struct smap_psock {
132132
struct work_struct gc_work;
133133

134134
struct proto *sk_proto;
135+
void (*save_unhash)(struct sock *sk);
135136
void (*save_close)(struct sock *sk, long timeout);
136137
void (*save_data_ready)(struct sock *sk);
137138
void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
143144
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
144145
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
145146
int offset, size_t size, int flags);
147+
static void bpf_tcp_unhash(struct sock *sk);
146148
static void bpf_tcp_close(struct sock *sk, long timeout);
147149

148150
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
184186
struct proto *base)
185187
{
186188
prot[SOCKMAP_BASE] = *base;
189+
prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
187190
prot[SOCKMAP_BASE].close = bpf_tcp_close;
188191
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
189192
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
217220
return -EBUSY;
218221
}
219222

223+
psock->save_unhash = sk->sk_prot->unhash;
220224
psock->save_close = sk->sk_prot->close;
221225
psock->sk_proto = sk->sk_prot;
222226

@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
305309
return e;
306310
}
307311

308-
static void bpf_tcp_close(struct sock *sk, long timeout)
312+
static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
309313
{
310-
void (*close_fun)(struct sock *sk, long timeout);
311314
struct smap_psock_map_entry *e;
312315
struct sk_msg_buff *md, *mtmp;
313-
struct smap_psock *psock;
314316
struct sock *osk;
315317

316-
lock_sock(sk);
317-
rcu_read_lock();
318-
psock = smap_psock_sk(sk);
319-
if (unlikely(!psock)) {
320-
rcu_read_unlock();
321-
release_sock(sk);
322-
return sk->sk_prot->close(sk, timeout);
323-
}
324-
325-
/* The psock may be destroyed anytime after exiting the RCU critial
326-
* section so by the time we use close_fun the psock may no longer
327-
* be valid. However, bpf_tcp_close is called with the sock lock
328-
* held so the close hook and sk are still valid.
329-
*/
330-
close_fun = psock->save_close;
331-
332318
if (psock->cork) {
333319
free_start_sg(psock->sock, psock->cork, true);
334320
kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
379365
kfree(e);
380366
e = psock_map_pop(sk, psock);
381367
}
368+
}
369+
370+
static void bpf_tcp_unhash(struct sock *sk)
371+
{
372+
void (*unhash_fun)(struct sock *sk);
373+
struct smap_psock *psock;
374+
375+
rcu_read_lock();
376+
psock = smap_psock_sk(sk);
377+
if (unlikely(!psock)) {
378+
rcu_read_unlock();
379+
if (sk->sk_prot->unhash)
380+
sk->sk_prot->unhash(sk);
381+
return;
382+
}
383+
unhash_fun = psock->save_unhash;
384+
bpf_tcp_remove(sk, psock);
385+
rcu_read_unlock();
386+
unhash_fun(sk);
387+
}
388+
389+
static void bpf_tcp_close(struct sock *sk, long timeout)
390+
{
391+
void (*close_fun)(struct sock *sk, long timeout);
392+
struct smap_psock *psock;
393+
394+
lock_sock(sk);
395+
rcu_read_lock();
396+
psock = smap_psock_sk(sk);
397+
if (unlikely(!psock)) {
398+
rcu_read_unlock();
399+
release_sock(sk);
400+
return sk->sk_prot->close(sk, timeout);
401+
}
402+
close_fun = psock->save_close;
403+
bpf_tcp_remove(sk, psock);
382404
rcu_read_unlock();
383405
release_sock(sk);
384406
close_fun(sk, timeout);
@@ -2097,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map,
20972119
return -EINVAL;
20982120
}
20992121

2122+
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
2123+
* state.
2124+
*/
21002125
if (skops.sk->sk_type != SOCK_STREAM ||
2101-
skops.sk->sk_protocol != IPPROTO_TCP) {
2126+
skops.sk->sk_protocol != IPPROTO_TCP ||
2127+
skops.sk->sk_state != TCP_ESTABLISHED) {
21022128
fput(socket->file);
21032129
return -EOPNOTSUPP;
21042130
}
@@ -2453,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
24532479
return -EINVAL;
24542480
}
24552481

2482+
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
2483+
* state.
2484+
*/
2485+
if (skops.sk->sk_type != SOCK_STREAM ||
2486+
skops.sk->sk_protocol != IPPROTO_TCP ||
2487+
skops.sk->sk_state != TCP_ESTABLISHED) {
2488+
fput(socket->file);
2489+
return -EOPNOTSUPP;
2490+
}
2491+
24562492
lock_sock(skops.sk);
24572493
preempt_disable();
24582494
rcu_read_lock();
@@ -2543,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = {
25432579
.map_check_btf = map_check_no_btf,
25442580
};
25452581

2582+
static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
2583+
{
2584+
return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
2585+
ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
2586+
}
25462587
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
25472588
struct bpf_map *, map, void *, key, u64, flags)
25482589
{
25492590
WARN_ON_ONCE(!rcu_read_lock_held());
2591+
2592+
/* ULPs are currently supported only for TCP sockets in ESTABLISHED
2593+
* state. This checks that the sock ops triggering the update is
2594+
* one indicating we are (or will be soon) in an ESTABLISHED state.
2595+
*/
2596+
if (!bpf_is_valid_sock_op(bpf_sock))
2597+
return -EOPNOTSUPP;
25502598
return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
25512599
}
25522600

@@ -2565,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
25652613
struct bpf_map *, map, void *, key, u64, flags)
25662614
{
25672615
WARN_ON_ONCE(!rcu_read_lock_held());
2616+
2617+
if (!bpf_is_valid_sock_op(bpf_sock))
2618+
return -EOPNOTSUPP;
25682619
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
25692620
}
25702621

tools/testing/selftests/bpf/test_maps.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data)
580580
/* Test update without programs */
581581
for (i = 0; i < 6; i++) {
582582
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
583-
if (err) {
583+
if (i < 2 && !err) {
584+
printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
585+
i, sfd[i]);
586+
goto out_sockmap;
587+
} else if (i >= 2 && err) {
584588
printf("Failed noprog update sockmap '%i:%i'\n",
585589
i, sfd[i]);
586590
goto out_sockmap;
@@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data)
741745
}
742746

743747
/* Test map update elem afterwards fd lives in fd and map_fd */
744-
for (i = 0; i < 6; i++) {
748+
for (i = 2; i < 6; i++) {
745749
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
746750
if (err) {
747751
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
@@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data)
845849
}
846850

847851
/* Delete the elems without programs */
848-
for (i = 0; i < 6; i++) {
852+
for (i = 2; i < 6; i++) {
849853
err = bpf_map_delete_elem(fd, &i);
850854
if (err) {
851855
printf("Failed delete sockmap %i '%i:%i'\n",

0 commit comments

Comments
 (0)