Skip to content

Commit 0856a30

Browse files
pdxChendavem330
authored andcommitted
Scm: Remove unnecessary pid & credential references in Unix socket's send and receive path
Patch series 109f6e3..7361c36 back in 2.6.36 added functionality to allow credentials to work across pid namespaces for packets sent via UNIX sockets. However, the atomic reference counts on pid and credentials caused plenty of cache bouncing when there are numerous threads of the same pid sharing a UNIX socket. This patch mitigates the problem by eliminating extraneous reference counts on pid and credentials on both send and receive path of UNIX sockets. I found a 2x improvement in hackbench's threaded case. On the receive path in unix_dgram_recvmsg, currently there is an increment of reference count on pid and credentials in scm_set_cred. Then there are two decrement of the reference counts. Once in scm_recv and once when skb_free_datagram call skb->destructor function unix_destruct_scm. One pair of increment and decrement of ref count on pid and credentials can be eliminated from the receive path. Until we destroy the skb, we already set a reference when we created the skb on the send side. On the send path, there are two increments of ref count on pid and credentials, once in scm_send and once in unix_scm_to_skb. Then there is a decrement of the reference counts in scm_destroy's call to scm_destroy_cred at the end of unix_dgram_sendmsg functions. One pair of increment and decrement of the reference counts can be removed so we only need to increment the ref counts once. By incorporating these changes, for hackbench running on a 4 socket NHM-EX machine with 40 cores, the execution of hackbench on 50 groups of 20 threads sped up by factor of 2. Hackbench command used for testing: ./hackbench 50 thread 2000 Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 6af29cc commit 0856a30

File tree

2 files changed

+48
-19
lines changed

2 files changed

+48
-19
lines changed

include/net/scm.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm,
5353
cred_to_ucred(pid, cred, &scm->creds);
5454
}
5555

56+
static __inline__ void scm_set_cred_noref(struct scm_cookie *scm,
57+
struct pid *pid, const struct cred *cred)
58+
{
59+
scm->pid = pid;
60+
scm->cred = cred;
61+
cred_to_ucred(pid, cred, &scm->creds);
62+
}
63+
5664
static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
5765
{
5866
put_pid(scm->pid);
@@ -70,6 +78,15 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
7078
__scm_destroy(scm);
7179
}
7280

81+
static __inline__ void scm_release(struct scm_cookie *scm)
82+
{
83+
/* keep ref on pid and cred */
84+
scm->pid = NULL;
85+
scm->cred = NULL;
86+
if (scm->fp)
87+
__scm_destroy(scm);
88+
}
89+
7390
static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
7491
struct scm_cookie *scm)
7592
{
@@ -108,15 +125,14 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
108125
if (!msg->msg_control) {
109126
if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp)
110127
msg->msg_flags |= MSG_CTRUNC;
111-
scm_destroy(scm);
128+
if (scm && scm->fp)
129+
__scm_destroy(scm);
112130
return;
113131
}
114132

115133
if (test_bit(SOCK_PASSCRED, &sock->flags))
116134
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
117135

118-
scm_destroy_cred(scm);
119-
120136
scm_passec(sock, msg, scm);
121137

122138
if (!scm->fp)

net/unix/af_unix.c

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,11 +1378,17 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
13781378
return max_level;
13791379
}
13801380

1381-
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1381+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb,
1382+
bool send_fds, bool ref)
13821383
{
13831384
int err = 0;
1384-
UNIXCB(skb).pid = get_pid(scm->pid);
1385-
UNIXCB(skb).cred = get_cred(scm->cred);
1385+
if (ref) {
1386+
UNIXCB(skb).pid = get_pid(scm->pid);
1387+
UNIXCB(skb).cred = get_cred(scm->cred);
1388+
} else {
1389+
UNIXCB(skb).pid = scm->pid;
1390+
UNIXCB(skb).cred = scm->cred;
1391+
}
13861392
UNIXCB(skb).fp = NULL;
13871393
if (scm->fp && send_fds)
13881394
err = unix_attach_fds(scm, skb);
@@ -1407,7 +1413,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
14071413
int namelen = 0; /* fake GCC */
14081414
int err;
14091415
unsigned hash;
1410-
struct sk_buff *skb;
1416+
struct sk_buff *skb = NULL;
14111417
long timeo;
14121418
struct scm_cookie tmp_scm;
14131419
int max_level;
@@ -1448,7 +1454,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
14481454
if (skb == NULL)
14491455
goto out;
14501456

1451-
err = unix_scm_to_skb(siocb->scm, skb, true);
1457+
err = unix_scm_to_skb(siocb->scm, skb, true, false);
14521458
if (err < 0)
14531459
goto out_free;
14541460
max_level = err + 1;
@@ -1544,7 +1550,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
15441550
unix_state_unlock(other);
15451551
other->sk_data_ready(other, len);
15461552
sock_put(other);
1547-
scm_destroy(siocb->scm);
1553+
scm_release(siocb->scm);
15481554
return len;
15491555

15501556
out_unlock:
@@ -1554,7 +1560,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
15541560
out:
15551561
if (other)
15561562
sock_put(other);
1557-
scm_destroy(siocb->scm);
1563+
if (skb == NULL)
1564+
scm_destroy(siocb->scm);
15581565
return err;
15591566
}
15601567

@@ -1566,7 +1573,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
15661573
struct sock *sk = sock->sk;
15671574
struct sock *other = NULL;
15681575
int err, size;
1569-
struct sk_buff *skb;
1576+
struct sk_buff *skb = NULL;
15701577
int sent = 0;
15711578
struct scm_cookie tmp_scm;
15721579
bool fds_sent = false;
@@ -1631,19 +1638,19 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16311638
size = min_t(int, size, skb_tailroom(skb));
16321639

16331640

1634-
/* Only send the fds in the first buffer */
1635-
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1641+
/* Only send the fds and no ref to pid in the first buffer */
1642+
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent, fds_sent);
16361643
if (err < 0) {
16371644
kfree_skb(skb);
1638-
goto out_err;
1645+
goto out;
16391646
}
16401647
max_level = err + 1;
16411648
fds_sent = true;
16421649

16431650
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
16441651
if (err) {
16451652
kfree_skb(skb);
1646-
goto out_err;
1653+
goto out;
16471654
}
16481655

16491656
unix_state_lock(other);
@@ -1660,7 +1667,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16601667
sent += size;
16611668
}
16621669

1663-
scm_destroy(siocb->scm);
1670+
if (skb)
1671+
scm_release(siocb->scm);
1672+
else
1673+
scm_destroy(siocb->scm);
16641674
siocb->scm = NULL;
16651675

16661676
return sent;
@@ -1673,7 +1683,9 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16731683
send_sig(SIGPIPE, current, 0);
16741684
err = -EPIPE;
16751685
out_err:
1676-
scm_destroy(siocb->scm);
1686+
if (skb == NULL)
1687+
scm_destroy(siocb->scm);
1688+
out:
16771689
siocb->scm = NULL;
16781690
return sent ? : err;
16791691
}
@@ -1777,7 +1789,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
17771789
siocb->scm = &tmp_scm;
17781790
memset(&tmp_scm, 0, sizeof(tmp_scm));
17791791
}
1780-
scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1792+
scm_set_cred_noref(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
17811793
unix_set_secdata(siocb->scm, skb);
17821794

17831795
if (!(flags & MSG_PEEK)) {
@@ -1939,7 +1951,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
19391951
}
19401952
} else {
19411953
/* Copy credentials */
1942-
scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1954+
scm_set_cred_noref(siocb->scm, UNIXCB(skb).pid,
1955+
UNIXCB(skb).cred);
19431956
check_creds = 1;
19441957
}
19451958

0 commit comments

Comments
 (0)