Skip to content

Commit cd40b7d

Browse files
dlunevdavem330
authored andcommitted
[NET]: make netlink user -> kernel interface synchronious
This patch make processing netlink user -> kernel messages synchronious. This change was inspired by the talk with Alexey Kuznetsov about current netlink messages processing. He says that he was badly wrong when introduced asynchronious user -> kernel communication. The call netlink_unicast is the only path to send message to the kernel netlink socket. But, unfortunately, it is also used to send data to the user. Before this change the user message has been attached to the socket queue and sk->sk_data_ready was called. The process has been blocked until all pending messages were processed. The bad thing is that this processing may occur in the arbitrary process context. This patch changes nlk->data_ready callback to get 1 skb and force packet processing right in the netlink_unicast. Kernel -> user path in netlink_unicast remains untouched. EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock drop, but the process remains in the cycle until the message will be fully processed. So, there is no need to use this kludges now. Signed-off-by: Denis V. Lunev <den@openvz.org> Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent aed8156 commit cd40b7d

File tree

18 files changed

+130
-299
lines changed

18 files changed

+130
-299
lines changed

drivers/connector/connector.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -234,18 +234,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
234234
kfree_skb(__skb);
235235
}
236236

237-
/*
238-
* Netlink socket input callback - dequeues the skbs and calls the
239-
* main netlink receiving function.
240-
*/
241-
static void cn_input(struct sock *sk, int len)
242-
{
243-
struct sk_buff *skb;
244-
245-
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
246-
cn_rx_skb(skb);
247-
}
248-
249237
/*
250238
* Notification routing.
251239
*
@@ -442,7 +430,7 @@ static int __devinit cn_init(void)
442430
struct cn_dev *dev = &cdev;
443431
int err;
444432

445-
dev->input = cn_input;
433+
dev->input = cn_rx_skb;
446434
dev->id.idx = cn_idx;
447435
dev->id.val = cn_val;
448436

drivers/scsi/scsi_netlink.c

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
6464

6565
if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
6666
err = -EBADMSG;
67-
goto next_msg;
67+
return;
6868
}
6969

7070
hdr = NLMSG_DATA(nlh);
@@ -98,27 +98,6 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
9898
}
9999

100100

101-
/**
102-
* scsi_nl_rcv_msg -
103-
* Receive handler for a socket. Extracts a received message buffer from
104-
* the socket, and starts message processing.
105-
*
106-
* @sk: socket
107-
* @len: unused
108-
*
109-
**/
110-
static void
111-
scsi_nl_rcv(struct sock *sk, int len)
112-
{
113-
struct sk_buff *skb;
114-
115-
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
116-
scsi_nl_rcv_msg(skb);
117-
kfree_skb(skb);
118-
}
119-
}
120-
121-
122101
/**
123102
* scsi_nl_rcv_event -
124103
* Event handler for a netlink socket.
@@ -168,7 +147,7 @@ scsi_netlink_init(void)
168147
}
169148

170149
scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
171-
SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
150+
SCSI_NL_GRP_CNT, scsi_nl_rcv_msg, NULL,
172151
THIS_MODULE);
173152
if (!scsi_nl_sock) {
174153
printk(KERN_ERR "%s: register of recieve handler failed\n",

drivers/scsi/scsi_transport_iscsi.c

Lines changed: 35 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,61 +1097,49 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
10971097
}
10981098

10991099
/*
1100-
* Get message from skb (based on rtnetlink_rcv_skb). Each message is
1101-
* processed by iscsi_if_recv_msg. Malformed skbs with wrong lengths or
1102-
* invalid creds are discarded silently.
1100+
* Get message from skb. Each message is processed by iscsi_if_recv_msg.
1101+
* Malformed skbs with wrong lengths or invalid creds are not processed.
11031102
*/
11041103
static void
1105-
iscsi_if_rx(struct sock *sk, int len)
1104+
iscsi_if_rx(struct sk_buff *skb)
11061105
{
1107-
struct sk_buff *skb;
1108-
11091106
mutex_lock(&rx_queue_mutex);
1110-
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1111-
if (NETLINK_CREDS(skb)->uid) {
1112-
skb_pull(skb, skb->len);
1113-
goto free_skb;
1107+
while (skb->len >= NLMSG_SPACE(0)) {
1108+
int err;
1109+
uint32_t rlen;
1110+
struct nlmsghdr *nlh;
1111+
struct iscsi_uevent *ev;
1112+
1113+
nlh = nlmsg_hdr(skb);
1114+
if (nlh->nlmsg_len < sizeof(*nlh) ||
1115+
skb->len < nlh->nlmsg_len) {
1116+
break;
11141117
}
11151118

1116-
while (skb->len >= NLMSG_SPACE(0)) {
1117-
int err;
1118-
uint32_t rlen;
1119-
struct nlmsghdr *nlh;
1120-
struct iscsi_uevent *ev;
1119+
ev = NLMSG_DATA(nlh);
1120+
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1121+
if (rlen > skb->len)
1122+
rlen = skb->len;
11211123

1122-
nlh = nlmsg_hdr(skb);
1123-
if (nlh->nlmsg_len < sizeof(*nlh) ||
1124-
skb->len < nlh->nlmsg_len) {
1125-
break;
1126-
}
1127-
1128-
ev = NLMSG_DATA(nlh);
1129-
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1130-
if (rlen > skb->len)
1131-
rlen = skb->len;
1132-
1133-
err = iscsi_if_recv_msg(skb, nlh);
1134-
if (err) {
1135-
ev->type = ISCSI_KEVENT_IF_ERROR;
1136-
ev->iferror = err;
1137-
}
1138-
do {
1139-
/*
1140-
* special case for GET_STATS:
1141-
* on success - sending reply and stats from
1142-
* inside of if_recv_msg(),
1143-
* on error - fall through.
1144-
*/
1145-
if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
1146-
break;
1147-
err = iscsi_if_send_reply(
1148-
NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
1149-
nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
1150-
} while (err < 0 && err != -ECONNREFUSED);
1151-
skb_pull(skb, rlen);
1124+
err = iscsi_if_recv_msg(skb, nlh);
1125+
if (err) {
1126+
ev->type = ISCSI_KEVENT_IF_ERROR;
1127+
ev->iferror = err;
11521128
}
1153-
free_skb:
1154-
kfree_skb(skb);
1129+
do {
1130+
/*
1131+
* special case for GET_STATS:
1132+
* on success - sending reply and stats from
1133+
* inside of if_recv_msg(),
1134+
* on error - fall through.
1135+
*/
1136+
if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
1137+
break;
1138+
err = iscsi_if_send_reply(
1139+
NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
1140+
nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
1141+
} while (err < 0 && err != -ECONNREFUSED);
1142+
skb_pull(skb, rlen);
11551143
}
11561144
mutex_unlock(&rx_queue_mutex);
11571145
}

fs/ecryptfs/netlink.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(struct sk_buff *skb)
165165
* it to its desired netlink context element and wake up the process
166166
* that is waiting for a response.
167167
*/
168-
static void ecryptfs_receive_nl_message(struct sock *sk, int len)
168+
static void ecryptfs_receive_nl_message(struct sk_buff *skb)
169169
{
170-
struct sk_buff *skb;
171170
struct nlmsghdr *nlh;
172-
int rc = 0; /* skb_recv_datagram requires this */
173171

174-
receive:
175-
skb = skb_recv_datagram(sk, 0, 0, &rc);
176-
if (rc == -EINTR)
177-
goto receive;
178-
else if (rc < 0) {
179-
ecryptfs_printk(KERN_ERR, "Error occurred while "
180-
"receiving eCryptfs netlink message; "
181-
"rc = [%d]\n", rc);
182-
return;
183-
}
184172
nlh = nlmsg_hdr(skb);
185173
if (!NLMSG_OK(nlh, skb->len)) {
186174
ecryptfs_printk(KERN_ERR, "Received corrupt netlink "

include/linux/connector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ struct cn_dev {
153153

154154
u32 seq, groups;
155155
struct sock *nls;
156-
void (*input) (struct sock * sk, int len);
156+
void (*input) (struct sk_buff *skb);
157157

158158
struct cn_queue_dev *cbdev;
159159
};

include/linux/netlink.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ struct netlink_skb_parms
175175

176176
extern struct sock *netlink_kernel_create(struct net *net,
177177
int unit,unsigned int groups,
178-
void (*input)(struct sock *sk, int len),
178+
void (*input)(struct sk_buff *skb),
179179
struct mutex *cb_mutex,
180180
struct module *module);
181181
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);

include/net/netlink.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,9 @@ struct nl_info {
220220
u32 pid;
221221
};
222222

223-
extern unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen,
224-
int (*cb)(struct sk_buff *,
225-
struct nlmsghdr *));
223+
extern int netlink_rcv_skb(struct sk_buff *skb,
224+
int (*cb)(struct sk_buff *,
225+
struct nlmsghdr *));
226226
extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb,
227227
u32 pid, unsigned int group, int report,
228228
gfp_t flags);

kernel/audit.c

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb)
847847
}
848848

849849
/* Receive messages from netlink socket. */
850-
static void audit_receive(struct sock *sk, int length)
850+
static void audit_receive(struct sk_buff *skb)
851851
{
852-
struct sk_buff *skb;
853-
unsigned int qlen;
854-
855852
mutex_lock(&audit_cmd_mutex);
856-
857-
for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
858-
skb = skb_dequeue(&sk->sk_receive_queue);
859-
audit_receive_skb(skb);
860-
kfree_skb(skb);
861-
}
853+
audit_receive_skb(skb);
862854
mutex_unlock(&audit_cmd_mutex);
863855
}
864856

net/core/rtnetlink.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,15 +1312,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
13121312
return doit(skb, nlh, (void *)&rta_buf[0]);
13131313
}
13141314

1315-
static void rtnetlink_rcv(struct sock *sk, int len)
1315+
static void rtnetlink_rcv(struct sk_buff *skb)
13161316
{
1317-
unsigned int qlen = 0;
1318-
1319-
do {
1320-
rtnl_lock();
1321-
qlen = netlink_run_queue(sk, qlen, &rtnetlink_rcv_msg);
1322-
rtnl_unlock();
1323-
} while (qlen);
1317+
rtnl_lock();
1318+
netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
1319+
rtnl_unlock();
13241320
}
13251321

13261322
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)

net/decnet/netfilter/dn_rtmsg.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,17 +115,6 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
115115
RCV_SKB_FAIL(-EINVAL);
116116
}
117117

118-
static void dnrmg_receive_user_sk(struct sock *sk, int len)
119-
{
120-
struct sk_buff *skb;
121-
unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
122-
123-
for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) {
124-
dnrmg_receive_user_skb(skb);
125-
kfree_skb(skb);
126-
}
127-
}
128-
129118
static struct nf_hook_ops dnrmg_ops = {
130119
.hook = dnrmg_hook,
131120
.pf = PF_DECnet,
@@ -139,7 +128,8 @@ static int __init dn_rtmsg_init(void)
139128

140129
dnrmg = netlink_kernel_create(&init_net,
141130
NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
142-
dnrmg_receive_user_sk, NULL, THIS_MODULE);
131+
dnrmg_receive_user_skb,
132+
NULL, THIS_MODULE);
143133
if (dnrmg == NULL) {
144134
printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
145135
return -ENOMEM;

net/ipv4/fib_frontend.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
6262
#define FIB_TABLE_HASHSZ 256
6363
static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
6464

65+
static struct sock *fibnl = NULL;
66+
67+
6568
struct fib_table *fib_new_table(u32 id)
6669
{
6770
struct fib_table *tb;
@@ -811,13 +814,13 @@ static void nl_fib_input(struct sock *sk, int len)
811814
pid = NETLINK_CB(skb).pid; /* pid of sending process */
812815
NETLINK_CB(skb).pid = 0; /* from kernel */
813816
NETLINK_CB(skb).dst_group = 0; /* unicast */
814-
netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
817+
netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
815818
}
816819

817820
static void nl_fib_lookup_init(void)
818821
{
819-
netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input,
820-
NULL, THIS_MODULE);
822+
fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
823+
nl_fib_input, NULL, THIS_MODULE);
821824
}
822825

823826
static void fib_disable_ip(struct net_device *dev, int force)

net/ipv4/inet_diag.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -839,15 +839,11 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
839839

840840
static DEFINE_MUTEX(inet_diag_mutex);
841841

842-
static void inet_diag_rcv(struct sock *sk, int len)
842+
static void inet_diag_rcv(struct sk_buff *skb)
843843
{
844-
unsigned int qlen = 0;
845-
846-
do {
847-
mutex_lock(&inet_diag_mutex);
848-
qlen = netlink_run_queue(sk, qlen, &inet_diag_rcv_msg);
849-
mutex_unlock(&inet_diag_mutex);
850-
} while (qlen);
844+
mutex_lock(&inet_diag_mutex);
845+
netlink_rcv_skb(skb, &inet_diag_rcv_msg);
846+
mutex_unlock(&inet_diag_mutex);
851847
}
852848

853849
static DEFINE_SPINLOCK(inet_diag_register_lock);

net/ipv4/netfilter/ip_queue.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ ipq_dev_drop(int ifindex)
475475
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
476476

477477
static inline void
478-
ipq_rcv_skb(struct sk_buff *skb)
478+
__ipq_rcv_skb(struct sk_buff *skb)
479479
{
480480
int status, type, pid, flags, nlmsglen, skblen;
481481
struct nlmsghdr *nlh;
@@ -533,19 +533,10 @@ ipq_rcv_skb(struct sk_buff *skb)
533533
}
534534

535535
static void
536-
ipq_rcv_sk(struct sock *sk, int len)
536+
ipq_rcv_skb(struct sk_buff *skb)
537537
{
538-
struct sk_buff *skb;
539-
unsigned int qlen;
540-
541538
mutex_lock(&ipqnl_mutex);
542-
543-
for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
544-
skb = skb_dequeue(&sk->sk_receive_queue);
545-
ipq_rcv_skb(skb);
546-
kfree_skb(skb);
547-
}
548-
539+
__ipq_rcv_skb(skb);
549540
mutex_unlock(&ipqnl_mutex);
550541
}
551542

@@ -670,7 +661,7 @@ static int __init ip_queue_init(void)
670661

671662
netlink_register_notifier(&ipq_nl_notifier);
672663
ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
673-
ipq_rcv_sk, NULL, THIS_MODULE);
664+
ipq_rcv_skb, NULL, THIS_MODULE);
674665
if (ipqnl == NULL) {
675666
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
676667
goto cleanup_netlink_notifier;

0 commit comments

Comments
 (0)