Skip to content

Commit 0fa7fa9

Browse files
xemuldavem330
authored andcommitted
packet: Protect packet sk list with mutex (v2)
Change since v1: * Fixed inuse counters access spotted by Eric In patch eea68e2 (packet: Report socket mclist info via diag module) I've introduced a "scheduling in atomic" problem in packet diag module -- the socket list is traversed under rcu_read_lock() while performed under it sk mclist access requires rtnl lock (i.e. -- mutex) to be taken. [152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002 [152363.820573] 4 locks held by crtools/12517: [152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e [152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a [152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab [152363.820693] #3: (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af Similar thing was then re-introduced by further packet diag patches (fanount mutex and pgvec mutex for rings) :( Apart from being terribly sorry for the above, I propose to change the packet sk list protection from spinlock to mutex. This lock currently protects two modifications: * sklist * prot inuse counters The sklist modifications can be just reprotected with mutex since they already occur in a sleeping context. The inuse counters modifications are trickier -- the __this_cpu_-s are used inside, thus requiring the caller to handle the potential issues with contexts himself. Since packet sockets' counters are modified in two places only (packet_create and packet_release) we only need to protect the context from being preempted. BH disabling is not required in this case. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent b32607d commit 0fa7fa9

File tree

3 files changed

+15
-9
lines changed

3 files changed

+15
-9
lines changed

include/net/netns/packet.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include <linux/spinlock.h>
99

1010
struct netns_packet {
11-
spinlock_t sklist_lock;
11+
struct mutex sklist_lock;
1212
struct hlist_head sklist;
1313
};
1414

net/packet/af_packet.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock)
23172317
net = sock_net(sk);
23182318
po = pkt_sk(sk);
23192319

2320-
spin_lock_bh(&net->packet.sklist_lock);
2320+
mutex_lock(&net->packet.sklist_lock);
23212321
sk_del_node_init_rcu(sk);
2322+
mutex_unlock(&net->packet.sklist_lock);
2323+
2324+
preempt_disable();
23222325
sock_prot_inuse_add(net, sk->sk_prot, -1);
2323-
spin_unlock_bh(&net->packet.sklist_lock);
2326+
preempt_enable();
23242327

23252328
spin_lock(&po->bind_lock);
23262329
unregister_prot_hook(sk, false);
@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
25192522
register_prot_hook(sk);
25202523
}
25212524

2522-
spin_lock_bh(&net->packet.sklist_lock);
2525+
mutex_lock(&net->packet.sklist_lock);
25232526
sk_add_node_rcu(sk, &net->packet.sklist);
2527+
mutex_unlock(&net->packet.sklist_lock);
2528+
2529+
preempt_disable();
25242530
sock_prot_inuse_add(net, &packet_proto, 1);
2525-
spin_unlock_bh(&net->packet.sklist_lock);
2531+
preempt_enable();
25262532

25272533
return 0;
25282534
out:
@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
37753781

37763782
static int __net_init packet_net_init(struct net *net)
37773783
{
3778-
spin_lock_init(&net->packet.sklist_lock);
3784+
mutex_init(&net->packet.sklist_lock);
37793785
INIT_HLIST_HEAD(&net->packet.sklist);
37803786

37813787
if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))

net/packet/diag.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
177177
net = sock_net(skb->sk);
178178
req = nlmsg_data(cb->nlh);
179179

180-
rcu_read_lock();
181-
sk_for_each_rcu(sk, node, &net->packet.sklist) {
180+
mutex_lock(&net->packet.sklist_lock);
181+
sk_for_each(sk, node, &net->packet.sklist) {
182182
if (!net_eq(sock_net(sk), net))
183183
continue;
184184
if (num < s_num)
@@ -192,7 +192,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
192192
num++;
193193
}
194194
done:
195-
rcu_read_unlock();
195+
mutex_unlock(&net->packet.sklist_lock);
196196
cb->args[0] = num;
197197

198198
return skb->len;

0 commit comments

Comments
 (0)