Skip to content

Commit eb4cb00

Browse files
kraigatgoogdavem330
authored andcommitted
sock_diag: define destruction multicast groups
These groups will contain socket-destruction events for AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP. Near the end of socket destruction, a check for listeners is performed. In the presence of a listener, rather than completely cleanup the socket, a unit of work will be added to a private work queue which will first broadcast information about the socket and then finish the cleanup operation. Signed-off-by: Craig Gallek <kraig@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 916035d commit eb4cb00

File tree

5 files changed

+148
-1
lines changed

5 files changed

+148
-1
lines changed

include/linux/sock_diag.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#ifndef __SOCK_DIAG_H__
22
#define __SOCK_DIAG_H__
33

4+
#include <linux/netlink.h>
45
#include <linux/user_namespace.h>
6+
#include <net/net_namespace.h>
7+
#include <net/sock.h>
58
#include <uapi/linux/sock_diag.h>
69

710
struct sk_buff;
@@ -11,6 +14,7 @@ struct sock;
1114
struct sock_diag_handler {
1215
__u8 family;
1316
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
17+
int (*get_info)(struct sk_buff *skb, struct sock *sk);
1418
};
1519

1620
int sock_diag_register(const struct sock_diag_handler *h);
@@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
2630
int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
2731
struct sk_buff *skb, int attrtype);
2832

33+
static inline
34+
enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
35+
{
36+
switch (sk->sk_family) {
37+
case AF_INET:
38+
switch (sk->sk_protocol) {
39+
case IPPROTO_TCP:
40+
return SKNLGRP_INET_TCP_DESTROY;
41+
case IPPROTO_UDP:
42+
return SKNLGRP_INET_UDP_DESTROY;
43+
default:
44+
return SKNLGRP_NONE;
45+
}
46+
case AF_INET6:
47+
switch (sk->sk_protocol) {
48+
case IPPROTO_TCP:
49+
return SKNLGRP_INET6_TCP_DESTROY;
50+
case IPPROTO_UDP:
51+
return SKNLGRP_INET6_UDP_DESTROY;
52+
default:
53+
return SKNLGRP_NONE;
54+
}
55+
default:
56+
return SKNLGRP_NONE;
57+
}
58+
}
59+
60+
static inline
61+
bool sock_diag_has_destroy_listeners(const struct sock *sk)
62+
{
63+
const struct net *n = sock_net(sk);
64+
const enum sknetlink_groups group = sock_diag_destroy_group(sk);
65+
66+
return group != SKNLGRP_NONE && n->diag_nlsk &&
67+
netlink_has_listeners(n->diag_nlsk, group);
68+
}
69+
void sock_diag_broadcast_destroy(struct sock *sk);
70+
2971
#endif

include/net/sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
15181518
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
15191519
struct proto *prot, int kern);
15201520
void sk_free(struct sock *sk);
1521+
void sk_destruct(struct sock *sk);
15211522
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
15221523

15231524
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,

include/uapi/linux/sock_diag.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,14 @@ enum {
2323
SK_MEMINFO_VARS,
2424
};
2525

26+
enum sknetlink_groups {
27+
SKNLGRP_NONE,
28+
SKNLGRP_INET_TCP_DESTROY,
29+
SKNLGRP_INET_UDP_DESTROY,
30+
SKNLGRP_INET6_TCP_DESTROY,
31+
SKNLGRP_INET6_UDP_DESTROY,
32+
__SKNLGRP_MAX,
33+
};
34+
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
35+
2636
#endif /* _UAPI__SOCK_DIAG_H__ */

net/core/sock.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@
131131
#include <linux/ipsec.h>
132132
#include <net/cls_cgroup.h>
133133
#include <net/netprio_cgroup.h>
134+
#include <linux/sock_diag.h>
134135

135136
#include <linux/filter.h>
136137

@@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
14231424
}
14241425
EXPORT_SYMBOL(sk_alloc);
14251426

1426-
static void __sk_free(struct sock *sk)
1427+
void sk_destruct(struct sock *sk)
14271428
{
14281429
struct sk_filter *filter;
14291430

@@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk)
14511452
sk_prot_free(sk->sk_prot_creator, sk);
14521453
}
14531454

1455+
static void __sk_free(struct sock *sk)
1456+
{
1457+
if (unlikely(sock_diag_has_destroy_listeners(sk)))
1458+
sock_diag_broadcast_destroy(sk);
1459+
else
1460+
sk_destruct(sk);
1461+
}
1462+
14541463
void sk_free(struct sock *sk)
14551464
{
14561465
/*

net/core/sock_diag.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,17 @@
55
#include <net/net_namespace.h>
66
#include <linux/module.h>
77
#include <net/sock.h>
8+
#include <linux/kernel.h>
9+
#include <linux/tcp.h>
10+
#include <linux/workqueue.h>
811

912
#include <linux/inet_diag.h>
1013
#include <linux/sock_diag.h>
1114

1215
static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
1316
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
1417
static DEFINE_MUTEX(sock_diag_table_mutex);
18+
static struct workqueue_struct *broadcast_wq;
1519

1620
static u64 sock_gen_cookie(struct sock *sk)
1721
{
@@ -101,6 +105,62 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
101105
}
102106
EXPORT_SYMBOL(sock_diag_put_filterinfo);
103107

108+
struct broadcast_sk {
109+
struct sock *sk;
110+
struct work_struct work;
111+
};
112+
113+
static size_t sock_diag_nlmsg_size(void)
114+
{
115+
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
116+
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
117+
+ nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
118+
}
119+
120+
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
121+
{
122+
struct broadcast_sk *bsk =
123+
container_of(work, struct broadcast_sk, work);
124+
struct sock *sk = bsk->sk;
125+
const struct sock_diag_handler *hndl;
126+
struct sk_buff *skb;
127+
const enum sknetlink_groups group = sock_diag_destroy_group(sk);
128+
int err = -1;
129+
130+
WARN_ON(group == SKNLGRP_NONE);
131+
132+
skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
133+
if (!skb)
134+
goto out;
135+
136+
mutex_lock(&sock_diag_table_mutex);
137+
hndl = sock_diag_handlers[sk->sk_family];
138+
if (hndl && hndl->get_info)
139+
err = hndl->get_info(skb, sk);
140+
mutex_unlock(&sock_diag_table_mutex);
141+
142+
if (!err)
143+
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
144+
GFP_KERNEL);
145+
else
146+
kfree_skb(skb);
147+
out:
148+
sk_destruct(sk);
149+
kfree(bsk);
150+
}
151+
152+
void sock_diag_broadcast_destroy(struct sock *sk)
153+
{
154+
/* Note, this function is often called from an interrupt context. */
155+
struct broadcast_sk *bsk =
156+
kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
157+
if (!bsk)
158+
return sk_destruct(sk);
159+
bsk->sk = sk;
160+
INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
161+
queue_work(broadcast_wq, &bsk->work);
162+
}
163+
104164
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
105165
{
106166
mutex_lock(&sock_diag_table_mutex);
@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
211271
mutex_unlock(&sock_diag_mutex);
212272
}
213273

274+
static int sock_diag_bind(struct net *net, int group)
275+
{
276+
switch (group) {
277+
case SKNLGRP_INET_TCP_DESTROY:
278+
case SKNLGRP_INET_UDP_DESTROY:
279+
if (!sock_diag_handlers[AF_INET])
280+
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
281+
NETLINK_SOCK_DIAG, AF_INET);
282+
break;
283+
case SKNLGRP_INET6_TCP_DESTROY:
284+
case SKNLGRP_INET6_UDP_DESTROY:
285+
if (!sock_diag_handlers[AF_INET6])
286+
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
287+
NETLINK_SOCK_DIAG, AF_INET);
288+
break;
289+
}
290+
return 0;
291+
}
292+
214293
static int __net_init diag_net_init(struct net *net)
215294
{
216295
struct netlink_kernel_cfg cfg = {
296+
.groups = SKNLGRP_MAX,
217297
.input = sock_diag_rcv,
298+
.bind = sock_diag_bind,
299+
.flags = NL_CFG_F_NONROOT_RECV,
218300
};
219301

220302
net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
234316

235317
static int __init sock_diag_init(void)
236318
{
319+
broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
320+
BUG_ON(!broadcast_wq);
237321
return register_pernet_subsys(&diag_net_ops);
238322
}
239323

240324
static void __exit sock_diag_exit(void)
241325
{
242326
unregister_pernet_subsys(&diag_net_ops);
327+
destroy_workqueue(broadcast_wq);
243328
}
244329

245330
module_init(sock_diag_init);

0 commit comments

Comments
 (0)