Skip to content

Commit 5284143

Browse files
committed
Merge branch 'listener_refactor_part_12'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 12 By adding a pointer back to listener, we are preparing synack rtx handling to no longer be governed by listener keepalive timer, as this is the most problematic source of contention on listener spinlock. Note that TCP FastOpen had such pointer anyway, so we make it generic. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 9f2dbdd + 0470c8c commit 5284143

File tree

12 files changed

+77
-73
lines changed

12 files changed

+77
-73
lines changed

include/linux/tcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ struct tcp_request_sock_ops;
111111
struct tcp_request_sock {
112112
struct inet_request_sock req;
113113
const struct tcp_request_sock_ops *af_specific;
114-
struct sock *listener; /* needed for TFO */
114+
bool tfo_listener;
115115
u32 rcv_isn;
116116
u32 snt_isn;
117117
u32 snt_synack; /* synack sent time */

include/net/inet_connection_sock.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
275275
struct sock *child)
276276
{
277277
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
278-
/* before letting lookups find us, make sure all req fields
279-
* are committed to memory.
280-
*/
281-
smp_wmb();
282-
atomic_set(&req->rsk_refcnt, 1);
283278
}
284279

285280
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,

include/net/inet_sock.h

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ struct inet_request_sock {
8181
#define ir_cookie req.__req_common.skc_cookie
8282
#define ireq_net req.__req_common.skc_net
8383
#define ireq_state req.__req_common.skc_state
84-
#define ireq_refcnt req.__req_common.skc_refcnt
8584
#define ireq_family req.__req_common.skc_family
8685

8786
kmemcheck_bitfield_begin(flags);
@@ -244,25 +243,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
244243
initval);
245244
}
246245

247-
static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
248-
{
249-
struct request_sock *req = reqsk_alloc(ops);
250-
struct inet_request_sock *ireq = inet_rsk(req);
251-
252-
if (req != NULL) {
253-
kmemcheck_annotate_bitfield(ireq, flags);
254-
ireq->opt = NULL;
255-
atomic64_set(&ireq->ir_cookie, 0);
256-
ireq->ireq_state = TCP_NEW_SYN_RECV;
257-
258-
/* Following is temporary. It is coupled with debugging
259-
* helpers in reqsk_put() & reqsk_free()
260-
*/
261-
atomic_set(&ireq->ireq_refcnt, 0);
262-
}
263-
264-
return req;
265-
}
246+
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
247+
struct sock *sk_listener);
266248

267249
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
268250
{

include/net/request_sock.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ struct request_sock {
5252
#define rsk_refcnt __req_common.skc_refcnt
5353

5454
struct request_sock *dl_next;
55+
struct sock *rsk_listener;
5556
u16 mss;
5657
u8 num_retrans; /* number of retransmits */
5758
u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
@@ -67,13 +68,21 @@ struct request_sock {
6768
u32 peer_secid;
6869
};
6970

70-
static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
71+
static inline struct request_sock *
72+
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
7173
{
7274
struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
7375

74-
if (req != NULL)
76+
if (req) {
7577
req->rsk_ops = ops;
76-
78+
sock_hold(sk_listener);
79+
req->rsk_listener = sk_listener;
80+
81+
/* Following is temporary. It is coupled with debugging
82+
* helpers in reqsk_put() & reqsk_free()
83+
*/
84+
atomic_set(&req->rsk_refcnt, 0);
85+
}
7786
return req;
7887
}
7988

@@ -88,6 +97,8 @@ static inline void reqsk_free(struct request_sock *req)
8897
WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
8998

9099
req->rsk_ops->destructor(req);
100+
if (req->rsk_listener)
101+
sock_put(req->rsk_listener);
91102
kmem_cache_free(req->rsk_ops->slab, req);
92103
}
93104

@@ -286,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
286297
req->sk = NULL;
287298
req->dl_next = lopt->syn_table[hash];
288299

300+
/* before letting lookups find us, make sure all req fields
301+
* are committed to memory and refcnt initialized.
302+
*/
303+
smp_wmb();
304+
atomic_set(&req->rsk_refcnt, 1);
305+
289306
write_lock(&queue->syn_wait_lock);
290307
lopt->syn_table[hash] = req;
291308
write_unlock(&queue->syn_wait_lock);

net/core/request_sock.c

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
153153
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
154154
* order.
155155
*
156-
* When a TFO req is created, it needs to sock_hold its listener to prevent
157-
* the latter data structure from going away.
158-
*
159-
* This function also sets "treq->listener" to NULL and unreference listener
160-
* socket. treq->listener is used by the listener so it is protected by the
156+
* This function also sets "treq->tfo_listener" to false.
157+
* treq->tfo_listener is used by the listener so it is protected by the
161158
* fastopenq->lock in this function.
162159
*/
163160
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
164161
bool reset)
165162
{
166-
struct sock *lsk = tcp_rsk(req)->listener;
167-
struct fastopen_queue *fastopenq =
168-
inet_csk(lsk)->icsk_accept_queue.fastopenq;
163+
struct sock *lsk = req->rsk_listener;
164+
struct fastopen_queue *fastopenq;
165+
166+
fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
169167

170168
tcp_sk(sk)->fastopen_rsk = NULL;
171169
spin_lock_bh(&fastopenq->lock);
172170
fastopenq->qlen--;
173-
tcp_rsk(req)->listener = NULL;
171+
tcp_rsk(req)->tfo_listener = false;
174172
if (req->sk) /* the child socket hasn't been accepted yet */
175173
goto out;
176174

@@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
179177
* special RST handling below.
180178
*/
181179
spin_unlock_bh(&fastopenq->lock);
182-
sock_put(lsk);
183180
reqsk_put(req);
184181
return;
185182
}
@@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
201198
fastopenq->qlen++;
202199
out:
203200
spin_unlock_bh(&fastopenq->lock);
204-
sock_put(lsk);
205201
}

net/dccp/ipv4.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
624624
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
625625
goto drop;
626626

627-
req = inet_reqsk_alloc(&dccp_request_sock_ops);
627+
req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
628628
if (req == NULL)
629629
goto drop;
630630

@@ -641,7 +641,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
641641
ireq = inet_rsk(req);
642642
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
643643
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
644-
write_pnet(&ireq->ireq_net, sock_net(sk));
645644
ireq->ireq_family = AF_INET;
646645
ireq->ir_iif = sk->sk_bound_dev_if;
647646

net/dccp/ipv6.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
386386
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
387387
goto drop;
388388

389-
req = inet_reqsk_alloc(&dccp6_request_sock_ops);
389+
req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
390390
if (req == NULL)
391391
goto drop;
392392

@@ -403,7 +403,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
403403
ireq = inet_rsk(req);
404404
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
405405
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
406-
write_pnet(&ireq->ireq_net, sock_net(sk));
407406
ireq->ireq_family = AF_INET6;
408407

409408
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||

net/ipv4/inet_connection_sock.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
293293
{
294294
struct inet_connection_sock *icsk = inet_csk(sk);
295295
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
296-
struct sock *newsk;
297296
struct request_sock *req;
297+
struct sock *newsk;
298298
int error;
299299

300300
lock_sock(sk);
@@ -323,9 +323,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
323323
newsk = req->sk;
324324

325325
sk_acceptq_removed(sk);
326-
if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
326+
if (sk->sk_protocol == IPPROTO_TCP &&
327+
tcp_rsk(req)->tfo_listener &&
328+
queue->fastopenq) {
327329
spin_lock_bh(&queue->fastopenq->lock);
328-
if (tcp_rsk(req)->listener) {
330+
if (tcp_rsk(req)->tfo_listener) {
329331
/* We are still waiting for the final ACK from 3WHS
330332
* so can't free req now. Instead, we set req->sk to
331333
* NULL to signify that the child socket is taken
@@ -817,9 +819,9 @@ void inet_csk_listen_stop(struct sock *sk)
817819

818820
percpu_counter_inc(sk->sk_prot->orphan_count);
819821

820-
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
822+
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
821823
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
822-
BUG_ON(sk != tcp_rsk(req)->listener);
824+
BUG_ON(sk != req->rsk_listener);
823825

824826
/* Paranoid, to prevent race condition if
825827
* an inbound pkt destined for child is
@@ -828,7 +830,6 @@ void inet_csk_listen_stop(struct sock *sk)
828830
* tcp_v4_destroy_sock().
829831
*/
830832
tcp_sk(child)->fastopen_rsk = NULL;
831-
sock_put(sk);
832833
}
833834
inet_csk_destroy_sock(child);
834835

net/ipv4/syncookies.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
227227
struct sock *child;
228228

229229
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
230-
if (child)
230+
if (child) {
231+
atomic_set(&req->rsk_refcnt, 1);
231232
inet_csk_reqsk_queue_add(sk, req, child);
232-
else
233+
} else {
233234
reqsk_free(req);
234-
235+
}
235236
return child;
236237
}
237238

@@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
325326
goto out;
326327

327328
ret = NULL;
328-
req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
329+
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
329330
if (!req)
330331
goto out;
331332

@@ -345,8 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
345346
ireq->tstamp_ok = tcp_opt.saw_tstamp;
346347
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
347348
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
348-
treq->listener = NULL;
349-
write_pnet(&ireq->ireq_net, sock_net(sk));
349+
treq->tfo_listener = false;
350350
ireq->ireq_family = AF_INET;
351351

352352
ireq->ir_iif = sk->sk_bound_dev_if;
@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
357357
ireq->opt = tcp_v4_save_options(skb);
358358

359359
if (security_inet_conn_request(sk, skb, req)) {
360-
reqsk_put(req);
360+
reqsk_free(req);
361361
goto out;
362362
}
363363

@@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
378378
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
379379
rt = ip_route_output_key(sock_net(sk), &fl4);
380380
if (IS_ERR(rt)) {
381-
reqsk_put(req);
381+
reqsk_free(req);
382382
goto out;
383383
}
384384

net/ipv4/tcp_fastopen.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
155155
tp = tcp_sk(child);
156156

157157
tp->fastopen_rsk = req;
158-
/* Do a hold on the listner sk so that if the listener is being
159-
* closed, the child that has been accepted can live on and still
160-
* access listen_lock.
161-
*/
162-
sock_hold(sk);
163-
tcp_rsk(req)->listener = sk;
158+
tcp_rsk(req)->tfo_listener = true;
164159

165160
/* RFC1323: The window in SYN & SYN/ACK segments is never
166161
* scaled. So correct it appropriately.
@@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
174169
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
175170
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
176171

172+
atomic_set(&req->rsk_refcnt, 1);
177173
/* Add the child socket directly into the accept queue */
178174
inet_csk_reqsk_queue_add(sk, req, child);
179175

net/ipv4/tcp_input.c

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5967,6 +5967,26 @@ static void tcp_openreq_init(struct request_sock *req,
59675967
ireq->ir_mark = inet_request_mark(sk, skb);
59685968
}
59695969

5970+
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
5971+
struct sock *sk_listener)
5972+
{
5973+
struct request_sock *req = reqsk_alloc(ops, sk_listener);
5974+
5975+
if (req) {
5976+
struct inet_request_sock *ireq = inet_rsk(req);
5977+
5978+
kmemcheck_annotate_bitfield(ireq, flags);
5979+
ireq->opt = NULL;
5980+
atomic64_set(&ireq->ir_cookie, 0);
5981+
ireq->ireq_state = TCP_NEW_SYN_RECV;
5982+
write_pnet(&ireq->ireq_net, sock_net(sk_listener));
5983+
5984+
}
5985+
5986+
return req;
5987+
}
5988+
EXPORT_SYMBOL(inet_reqsk_alloc);
5989+
59705990
int tcp_conn_request(struct request_sock_ops *rsk_ops,
59715991
const struct tcp_request_sock_ops *af_ops,
59725992
struct sock *sk, struct sk_buff *skb)
@@ -6004,7 +6024,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
60046024
goto drop;
60056025
}
60066026

6007-
req = inet_reqsk_alloc(rsk_ops);
6027+
req = inet_reqsk_alloc(rsk_ops, sk);
60086028
if (!req)
60096029
goto drop;
60106030

@@ -6020,7 +6040,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
60206040

60216041
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
60226042
tcp_openreq_init(req, &tmp_opt, skb, sk);
6023-
write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
60246043

60256044
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
60266045
inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
@@ -6097,7 +6116,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
60976116
if (err || want_cookie)
60986117
goto drop_and_free;
60996118

6100-
tcp_rsk(req)->listener = NULL;
6119+
tcp_rsk(req)->tfo_listener = false;
61016120
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
61026121
}
61036122

net/ipv6/syncookies.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
4949
struct sock *child;
5050

5151
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
52-
if (child)
52+
if (child) {
53+
atomic_set(&req->rsk_refcnt, 1);
5354
inet_csk_reqsk_queue_add(sk, req, child);
54-
else
55+
} else {
5556
reqsk_free(req);
56-
57+
}
5758
return child;
5859
}
5960

@@ -189,14 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
189190
goto out;
190191

191192
ret = NULL;
192-
req = inet_reqsk_alloc(&tcp6_request_sock_ops);
193+
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
193194
if (!req)
194195
goto out;
195196

196197
ireq = inet_rsk(req);
197198
treq = tcp_rsk(req);
198-
treq->listener = NULL;
199-
write_pnet(&ireq->ireq_net, sock_net(sk));
199+
treq->tfo_listener = false;
200200
ireq->ireq_family = AF_INET6;
201201

202202
if (security_inet_conn_request(sk, skb, req))

0 commit comments

Comments
 (0)