Skip to content

Commit 637bc8b

Browse files
Josef Bacikdavem330
authored andcommitted
inet: reset tb->fastreuseport when adding a reuseport sk
If we have non reuseport sockets on a tb we will set tb->fastreuseport to 0 and never set it again. Which means that in the future if we end up adding a bunch of reuseport sk's to that tb we'll have to do the expensive scan every time. Instead add the ipv4/ipv6 saddr fields to the bind bucket, as well as the family so we know what comparison to make, and the ipv6 only setting so we can make sure to compare with new sockets appropriately. Once one sk has made it onto the list we know that there are no potential bind conflicts on the owners list that match that sk's rcv_addr. So copy the sk's information into our bind bucket and set tb->fastruseport to FASTREUSESOCK_STRICT so we know we have to do an extra check for subsequent reuseport sockets and skip the expensive bind conflict check. Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 289141b commit 637bc8b

File tree

2 files changed

+95
-20
lines changed

2 files changed

+95
-20
lines changed

include/net/inet_hashtables.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,21 @@ struct inet_ehash_bucket {
7474
* users logged onto your box, isn't it nice to know that new data
7575
* ports are created in O(1) time? I thought so. ;-) -DaveM
7676
*/
77+
#define FASTREUSEPORT_ANY 1
78+
#define FASTREUSEPORT_STRICT 2
79+
7780
struct inet_bind_bucket {
7881
possible_net_t ib_net;
7982
unsigned short port;
8083
signed char fastreuse;
8184
signed char fastreuseport;
8285
kuid_t fastuid;
86+
#if IS_ENABLED(CONFIG_IPV6)
87+
struct in6_addr fast_v6_rcv_saddr;
88+
#endif
89+
__be32 fast_rcv_saddr;
90+
unsigned short fast_sk_family;
91+
bool fast_ipv6_only;
8392
struct hlist_node node;
8493
struct hlist_head owners;
8594
};

net/ipv4/inet_connection_sock.c

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,21 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
3838
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
3939
* and 0.0.0.0 equals to 0.0.0.0 only
4040
*/
41-
static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
41+
static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
42+
const struct in6_addr *sk2_rcv_saddr6,
43+
__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
44+
bool sk1_ipv6only, bool sk2_ipv6only,
4245
bool match_wildcard)
4346
{
44-
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
45-
int sk2_ipv6only = inet_v6_ipv6only(sk2);
46-
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
47+
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
4748
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
4849

4950
/* if both are mapped, treat as IPv4 */
5051
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
5152
if (!sk2_ipv6only) {
52-
if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
53+
if (sk1_rcv_saddr == sk2_rcv_saddr)
5354
return 1;
54-
if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
55+
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
5556
return match_wildcard;
5657
}
5758
return 0;
@@ -65,11 +66,11 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
6566
return 1;
6667

6768
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
68-
!(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
69+
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
6970
return 1;
7071

7172
if (sk2_rcv_saddr6 &&
72-
ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
73+
ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
7374
return 1;
7475

7576
return 0;
@@ -80,13 +81,13 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
8081
* match_wildcard == false: addresses must be exactly the same, i.e.
8182
* 0.0.0.0 only equals to 0.0.0.0
8283
*/
83-
static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
84-
bool match_wildcard)
84+
static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
85+
bool sk2_ipv6only, bool match_wildcard)
8586
{
86-
if (!ipv6_only_sock(sk2)) {
87-
if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
87+
if (!sk2_ipv6only) {
88+
if (sk1_rcv_saddr == sk2_rcv_saddr)
8889
return 1;
89-
if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
90+
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
9091
return match_wildcard;
9192
}
9293
return 0;
@@ -97,9 +98,16 @@ int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
9798
{
9899
#if IS_ENABLED(CONFIG_IPV6)
99100
if (sk->sk_family == AF_INET6)
100-
return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
101+
return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
102+
&sk2->sk_v6_rcv_saddr,
103+
sk->sk_rcv_saddr,
104+
sk2->sk_rcv_saddr,
105+
ipv6_only_sock(sk),
106+
ipv6_only_sock(sk2),
107+
match_wildcard);
101108
#endif
102-
return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
109+
return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
110+
ipv6_only_sock(sk2), match_wildcard);
103111
}
104112
EXPORT_SYMBOL(inet_rcv_saddr_equal);
105113

@@ -234,6 +242,39 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
234242
return head;
235243
}
236244

245+
static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
246+
struct sock *sk)
247+
{
248+
kuid_t uid = sock_i_uid(sk);
249+
250+
if (tb->fastreuseport <= 0)
251+
return 0;
252+
if (!sk->sk_reuseport)
253+
return 0;
254+
if (rcu_access_pointer(sk->sk_reuseport_cb))
255+
return 0;
256+
if (!uid_eq(tb->fastuid, uid))
257+
return 0;
258+
/* We only need to check the rcv_saddr if this tb was once marked
259+
* without fastreuseport and then was reset, as we can only know that
260+
* the fast_*rcv_saddr doesn't have any conflicts with the socks on the
261+
* owners list.
262+
*/
263+
if (tb->fastreuseport == FASTREUSEPORT_ANY)
264+
return 1;
265+
#if IS_ENABLED(CONFIG_IPV6)
266+
if (tb->fast_sk_family == AF_INET6)
267+
return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
268+
&sk->sk_v6_rcv_saddr,
269+
tb->fast_rcv_saddr,
270+
sk->sk_rcv_saddr,
271+
tb->fast_ipv6_only,
272+
ipv6_only_sock(sk), true);
273+
#endif
274+
return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
275+
ipv6_only_sock(sk), true);
276+
}
277+
237278
/* Obtain a reference to a local port for the given sock,
238279
* if snum is zero it means select any available local port.
239280
* We try to allocate an odd port (and leave even ports for connect())
@@ -273,9 +314,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
273314
goto success;
274315

275316
if ((tb->fastreuse > 0 && reuse) ||
276-
(tb->fastreuseport > 0 &&
277-
!rcu_access_pointer(sk->sk_reuseport_cb) &&
278-
sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
317+
sk_reuseport_match(tb, sk))
279318
goto success;
280319
if (inet_csk_bind_conflict(sk, tb, true, true))
281320
goto fail_unlock;
@@ -284,16 +323,43 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
284323
if (!hlist_empty(&tb->owners)) {
285324
tb->fastreuse = reuse;
286325
if (sk->sk_reuseport) {
287-
tb->fastreuseport = 1;
326+
tb->fastreuseport = FASTREUSEPORT_ANY;
288327
tb->fastuid = uid;
328+
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
329+
tb->fast_ipv6_only = ipv6_only_sock(sk);
330+
#if IS_ENABLED(CONFIG_IPV6)
331+
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
332+
#endif
289333
} else {
290334
tb->fastreuseport = 0;
291335
}
292336
} else {
293337
if (!reuse)
294338
tb->fastreuse = 0;
295-
if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
339+
if (sk->sk_reuseport) {
340+
/* We didn't match or we don't have fastreuseport set on
341+
* the tb, but we have sk_reuseport set on this socket
342+
* and we know that there are no bind conflicts with
343+
* this socket in this tb, so reset our tb's reuseport
344+
* settings so that any subsequent sockets that match
345+
* our current socket will be put on the fast path.
346+
*
347+
* If we reset we need to set FASTREUSEPORT_STRICT so we
348+
* do extra checking for all subsequent sk_reuseport
349+
* socks.
350+
*/
351+
if (!sk_reuseport_match(tb, sk)) {
352+
tb->fastreuseport = FASTREUSEPORT_STRICT;
353+
tb->fastuid = uid;
354+
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
355+
tb->fast_ipv6_only = ipv6_only_sock(sk);
356+
#if IS_ENABLED(CONFIG_IPV6)
357+
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
358+
#endif
359+
}
360+
} else {
296361
tb->fastreuseport = 0;
362+
}
297363
}
298364
if (!inet_csk(sk)->icsk_bind_hash)
299365
inet_bind_hash(sk, tb, port);

0 commit comments

Comments
 (0)