Skip to content

Commit d826eb1

Browse files
Eric Dumazetdavem330
authored andcommitted
ipv4: PKTINFO doesnt need dst reference
Le lundi 07 novembre 2011 à 15:33 +0100, Eric Dumazet a écrit : > At least, in recent kernels we dont change dst->refcnt in forwarding > patch (usinf NOREF skb->dst) > > One particular point is the atomic_inc(dst->refcnt) we have to perform > when queuing an UDP packet if socket asked PKTINFO stuff (for example a > typical DNS server has to setup this option) > > I have one patch somewhere that stores the information in skb->cb[] and > avoid the atomic_{inc|dec}(dst->refcnt). > OK I found it, I did some extra tests and believe its ready. [PATCH net-next] ipv4: IP_PKTINFO doesnt need dst reference When a socket uses IP_PKTINFO notifications, we currently force a dst reference for each received skb. Reader has to access dst to get needed information (rt_iif & rt_spec_dst) and must release dst reference. We also forced a dst reference if skb was put in socket backlog, even without IP_PKTINFO handling. This happens under stress/load. We can instead store the needed information in skb->cb[], so that only softirq handler really access dst, improving cache hit ratios. This removes two atomic operations per packet, and false sharing as well. On a benchmark using a mono threaded receiver (doing only recvmsg() calls), I can reach 720.000 pps instead of 570.000 pps. IP_PKTINFO is typically used by DNS servers, and any multihomed aware UDP application. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent acb32ba commit d826eb1

File tree

6 files changed

+28
-22
lines changed

6 files changed

+28
-22
lines changed

include/net/ip.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
450450
* Functions provided by ip_sockglue.c
451451
*/
452452

453-
extern int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
453+
extern void ipv4_pktinfo_prepare(struct sk_buff *skb);
454454
extern void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
455455
extern int ip_cmsg_send(struct net *net,
456456
struct msghdr *msg, struct ipcm_cookie *ipc);

net/ipv4/ip_sockglue.c

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,13 @@
5555
/*
5656
* SOL_IP control messages.
5757
*/
58+
#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
5859

5960
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
6061
{
61-
struct in_pktinfo info;
62-
struct rtable *rt = skb_rtable(skb);
62+
struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
6363

6464
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
65-
if (rt) {
66-
info.ipi_ifindex = rt->rt_iif;
67-
info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
68-
} else {
69-
info.ipi_ifindex = 0;
70-
info.ipi_spec_dst.s_addr = 0;
71-
}
7265

7366
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
7467
}
@@ -992,20 +985,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
992985
}
993986

994987
/**
995-
* ip_queue_rcv_skb - Queue an skb into sock receive queue
988+
* ipv4_pktinfo_prepare - transfert some info from rtable to skb
996989
* @sk: socket
997990
* @skb: buffer
998991
*
999-
* Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
1000-
* is not set, we drop skb dst entry now, while dst cache line is hot.
992+
* To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
993+
* in skb->cb[] before dst drop.
994+
* This way, receiver doesnt make cache line misses to read rtable.
1001995
*/
1002-
int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
996+
void ipv4_pktinfo_prepare(struct sk_buff *skb)
1003997
{
1004-
if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
1005-
skb_dst_drop(skb);
1006-
return sock_queue_rcv_skb(sk, skb);
998+
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
999+
const struct rtable *rt = skb_rtable(skb);
1000+
1001+
if (rt) {
1002+
pktinfo->ipi_ifindex = rt->rt_iif;
1003+
pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
1004+
} else {
1005+
pktinfo->ipi_ifindex = 0;
1006+
pktinfo->ipi_spec_dst.s_addr = 0;
1007+
}
1008+
skb_dst_drop(skb);
10071009
}
1008-
EXPORT_SYMBOL(ip_queue_rcv_skb);
10091010

10101011
int ip_setsockopt(struct sock *sk, int level,
10111012
int optname, char __user *optval, unsigned int optlen)

net/ipv4/raw.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
292292
{
293293
/* Charge it to the socket. */
294294

295-
if (ip_queue_rcv_skb(sk, skb) < 0) {
295+
ipv4_pktinfo_prepare(skb);
296+
if (sock_queue_rcv_skb(sk, skb) < 0) {
296297
kfree_skb(skb);
297298
return NET_RX_DROP;
298299
}

net/ipv4/udp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1357,7 +1357,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
13571357
if (inet_sk(sk)->inet_daddr)
13581358
sock_rps_save_rxhash(sk, skb);
13591359

1360-
rc = ip_queue_rcv_skb(sk, skb);
1360+
rc = sock_queue_rcv_skb(sk, skb);
13611361
if (rc < 0) {
13621362
int is_udplite = IS_UDPLITE(sk);
13631363

@@ -1473,6 +1473,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
14731473

14741474
rc = 0;
14751475

1476+
ipv4_pktinfo_prepare(skb);
14761477
bh_lock_sock(sk);
14771478
if (!sock_owned_by_user(sk))
14781479
rc = __udp_queue_rcv_skb(sk, skb);

net/ipv6/raw.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
383383
}
384384

385385
/* Charge it to the socket. */
386-
if (ip_queue_rcv_skb(sk, skb) < 0) {
386+
skb_dst_drop(skb);
387+
if (sock_queue_rcv_skb(sk, skb) < 0) {
387388
kfree_skb(skb);
388389
return NET_RX_DROP;
389390
}

net/ipv6/udp.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
538538
goto drop;
539539
}
540540

541-
if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
541+
skb_dst_drop(skb);
542+
rc = sock_queue_rcv_skb(sk, skb);
543+
if (rc < 0) {
542544
/* Note that an ENOMEM error is charged twice */
543545
if (rc == -ENOMEM)
544546
UDP6_INC_STATS_BH(sock_net(sk),

0 commit comments

Comments
 (0)