Skip to content

Commit 9f2dbdd

Browse files
committed
Merge branch 'listener_refactor_part_11'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 11 Before inserting request sockets into general (ehash) table, we need to prepare netfilter to cope with them, as they are not full sockets. I'll later change xt_socket to get full support, including for request sockets (NEW_SYN_RECV) Save 8 bytes in inet_request_sock on 64bit arches. We'll soon add a pointer to the listener socket. I included two TCP changes in this patch series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents c249739 + 7970ddc commit 9f2dbdd

File tree

10 files changed

+102
-85
lines changed

10 files changed

+102
-85
lines changed

include/net/inet_sock.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,25 +94,24 @@ struct inet_request_sock {
9494
acked : 1,
9595
no_srccheck: 1;
9696
kmemcheck_bitfield_end(flags);
97+
u32 ir_mark;
9798
union {
9899
struct ip_options_rcu *opt;
99100
struct sk_buff *pktopts;
100101
};
101-
u32 ir_mark;
102102
};
103103

104104
static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
105105
{
106106
return (struct inet_request_sock *)sk;
107107
}
108108

109-
static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
109+
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
110110
{
111-
if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
111+
if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
112112
return skb->mark;
113-
} else {
114-
return sk->sk_mark;
115-
}
113+
114+
return sk->sk_mark;
116115
}
117116

118117
struct inet_cork {

include/net/tcp.h

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,31 +1137,6 @@ static inline int tcp_full_space(const struct sock *sk)
11371137
return tcp_win_from_space(sk->sk_rcvbuf);
11381138
}
11391139

1140-
static inline void tcp_openreq_init(struct request_sock *req,
1141-
struct tcp_options_received *rx_opt,
1142-
struct sk_buff *skb, struct sock *sk)
1143-
{
1144-
struct inet_request_sock *ireq = inet_rsk(req);
1145-
1146-
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
1147-
req->cookie_ts = 0;
1148-
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1149-
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
1150-
tcp_rsk(req)->snt_synack = tcp_time_stamp;
1151-
tcp_rsk(req)->last_oow_ack_time = 0;
1152-
req->mss = rx_opt->mss_clamp;
1153-
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1154-
ireq->tstamp_ok = rx_opt->tstamp_ok;
1155-
ireq->sack_ok = rx_opt->sack_ok;
1156-
ireq->snd_wscale = rx_opt->snd_wscale;
1157-
ireq->wscale_ok = rx_opt->wscale_ok;
1158-
ireq->acked = 0;
1159-
ireq->ecn_ok = 0;
1160-
ireq->ir_rmt_port = tcp_hdr(skb)->source;
1161-
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
1162-
ireq->ir_mark = inet_request_mark(sk, skb);
1163-
}
1164-
11651140
extern void tcp_openreq_init_rwin(struct request_sock *req,
11661141
struct sock *sk, struct dst_entry *dst);
11671142

@@ -1241,36 +1216,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
12411216
return true;
12421217
}
12431218

1244-
/* Return true if we're currently rate-limiting out-of-window ACKs and
1245-
* thus shouldn't send a dupack right now. We rate-limit dupacks in
1246-
* response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
1247-
* attacks that send repeated SYNs or ACKs for the same connection. To
1248-
* do this, we do not send a duplicate SYNACK or ACK if the remote
1249-
* endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
1250-
*/
1251-
static inline bool tcp_oow_rate_limited(struct net *net,
1252-
const struct sk_buff *skb,
1253-
int mib_idx, u32 *last_oow_ack_time)
1254-
{
1255-
/* Data packets without SYNs are not likely part of an ACK loop. */
1256-
if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
1257-
!tcp_hdr(skb)->syn)
1258-
goto not_rate_limited;
1259-
1260-
if (*last_oow_ack_time) {
1261-
s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1262-
1263-
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1264-
NET_INC_STATS_BH(net, mib_idx);
1265-
return true; /* rate-limited: don't send yet! */
1266-
}
1267-
}
1268-
1269-
*last_oow_ack_time = tcp_time_stamp;
1270-
1271-
not_rate_limited:
1272-
return false; /* not rate-limited: go ahead, send dupack now! */
1273-
}
1219+
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1220+
int mib_idx, u32 *last_oow_ack_time);
12741221

12751222
static inline void tcp_mib_init(struct net *net)
12761223
{

net/ipv4/tcp_input.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
33213321
return flag;
33223322
}
33233323

3324+
/* Return true if we're currently rate-limiting out-of-window ACKs and
3325+
* thus shouldn't send a dupack right now. We rate-limit dupacks in
3326+
* response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
3327+
* attacks that send repeated SYNs or ACKs for the same connection. To
3328+
* do this, we do not send a duplicate SYNACK or ACK if the remote
3329+
* endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
3330+
*/
3331+
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3332+
int mib_idx, u32 *last_oow_ack_time)
3333+
{
3334+
/* Data packets without SYNs are not likely part of an ACK loop. */
3335+
if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3336+
!tcp_hdr(skb)->syn)
3337+
goto not_rate_limited;
3338+
3339+
if (*last_oow_ack_time) {
3340+
s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3341+
3342+
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3343+
NET_INC_STATS_BH(net, mib_idx);
3344+
return true; /* rate-limited: don't send yet! */
3345+
}
3346+
}
3347+
3348+
*last_oow_ack_time = tcp_time_stamp;
3349+
3350+
not_rate_limited:
3351+
return false; /* not rate-limited: go ahead, send dupack now! */
3352+
}
3353+
33243354
/* RFC 5961 7 [ACK Throttling] */
33253355
static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
33263356
{
@@ -5912,6 +5942,31 @@ static void tcp_ecn_create_request(struct request_sock *req,
59125942
inet_rsk(req)->ecn_ok = 1;
59135943
}
59145944

5945+
static void tcp_openreq_init(struct request_sock *req,
5946+
const struct tcp_options_received *rx_opt,
5947+
struct sk_buff *skb, const struct sock *sk)
5948+
{
5949+
struct inet_request_sock *ireq = inet_rsk(req);
5950+
5951+
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
5952+
req->cookie_ts = 0;
5953+
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
5954+
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5955+
tcp_rsk(req)->snt_synack = tcp_time_stamp;
5956+
tcp_rsk(req)->last_oow_ack_time = 0;
5957+
req->mss = rx_opt->mss_clamp;
5958+
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
5959+
ireq->tstamp_ok = rx_opt->tstamp_ok;
5960+
ireq->sack_ok = rx_opt->sack_ok;
5961+
ireq->snd_wscale = rx_opt->snd_wscale;
5962+
ireq->wscale_ok = rx_opt->wscale_ok;
5963+
ireq->acked = 0;
5964+
ireq->ecn_ok = 0;
5965+
ireq->ir_rmt_port = tcp_hdr(skb)->source;
5966+
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
5967+
ireq->ir_mark = inet_request_mark(sk, skb);
5968+
}
5969+
59155970
int tcp_conn_request(struct request_sock_ops *rsk_ops,
59165971
const struct tcp_request_sock_ops *af_ops,
59175972
struct sock *sk, struct sk_buff *skb)

net/netfilter/ipvs/ip_vs_xmit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
209209
struct sock *sk = skb->sk;
210210
struct rtable *ort = skb_rtable(skb);
211211

212-
if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
212+
if (!skb->dev && sk && sk_fullsock(sk))
213213
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
214214
}
215215

net/netfilter/nf_log_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
133133

134134
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
135135
{
136-
if (!sk || sk->sk_state == TCP_TIME_WAIT)
136+
if (!sk || !sk_fullsock(sk))
137137
return;
138138

139139
read_lock_bh(&sk->sk_callback_lock);

net/netfilter/nfnetlink_log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ __build_packet_message(struct nfnl_log_net *log,
539539

540540
/* UID */
541541
sk = skb->sk;
542-
if (sk && sk->sk_state != TCP_TIME_WAIT) {
542+
if (sk && sk_fullsock(sk)) {
543543
read_lock_bh(&sk->sk_callback_lock);
544544
if (sk->sk_socket && sk->sk_socket->file) {
545545
struct file *file = sk->sk_socket->file;

net/netfilter/nfnetlink_queue_core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
257257
{
258258
const struct cred *cred;
259259

260-
if (sk->sk_state == TCP_TIME_WAIT)
260+
if (!sk_fullsock(sk))
261261
return 0;
262262

263263
read_lock_bh(&sk->sk_callback_lock);

net/netfilter/nft_meta.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
8383
*(u16 *)dest->data = out->type;
8484
break;
8585
case NFT_META_SKUID:
86-
if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
86+
if (skb->sk == NULL || !sk_fullsock(skb->sk))
8787
goto err;
8888

8989
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -99,7 +99,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
9999
read_unlock_bh(&skb->sk->sk_callback_lock);
100100
break;
101101
case NFT_META_SKGID:
102-
if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
102+
if (skb->sk == NULL || !sk_fullsock(skb->sk))
103103
goto err;
104104

105105
read_lock_bh(&skb->sk->sk_callback_lock);

net/netfilter/xt_TPROXY.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
4242

4343
static bool tproxy_sk_is_transparent(struct sock *sk)
4444
{
45-
if (sk->sk_state != TCP_TIME_WAIT) {
46-
if (inet_sk(sk)->transparent)
47-
return true;
48-
sock_put(sk);
49-
} else {
45+
switch (sk->sk_state) {
46+
case TCP_TIME_WAIT:
5047
if (inet_twsk(sk)->tw_transparent)
5148
return true;
52-
inet_twsk_put(inet_twsk(sk));
49+
break;
50+
case TCP_NEW_SYN_RECV:
51+
if (inet_rsk(inet_reqsk(sk))->no_srccheck)
52+
return true;
53+
break;
54+
default:
55+
if (inet_sk(sk)->transparent)
56+
return true;
5357
}
58+
59+
sock_gen_put(sk);
5460
return false;
5561
}
5662

net/netfilter/xt_socket.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,20 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
129129
return NULL;
130130
}
131131

132+
static bool xt_socket_sk_is_transparent(struct sock *sk)
133+
{
134+
switch (sk->sk_state) {
135+
case TCP_TIME_WAIT:
136+
return inet_twsk(sk)->tw_transparent;
137+
138+
case TCP_NEW_SYN_RECV:
139+
return inet_rsk(inet_reqsk(sk))->no_srccheck;
140+
141+
default:
142+
return inet_sk(sk)->transparent;
143+
}
144+
}
145+
132146
static bool
133147
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
134148
const struct xt_socket_mtinfo1 *info)
@@ -195,16 +209,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
195209
* unless XT_SOCKET_NOWILDCARD is set
196210
*/
197211
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
198-
sk->sk_state != TCP_TIME_WAIT &&
212+
sk_fullsock(sk) &&
199213
inet_sk(sk)->inet_rcv_saddr == 0);
200214

201215
/* Ignore non-transparent sockets,
202-
if XT_SOCKET_TRANSPARENT is used */
216+
* if XT_SOCKET_TRANSPARENT is used
217+
*/
203218
if (info->flags & XT_SOCKET_TRANSPARENT)
204-
transparent = ((sk->sk_state != TCP_TIME_WAIT &&
205-
inet_sk(sk)->transparent) ||
206-
(sk->sk_state == TCP_TIME_WAIT &&
207-
inet_twsk(sk)->tw_transparent));
219+
transparent = xt_socket_sk_is_transparent(sk);
208220

209221
if (sk != skb->sk)
210222
sock_gen_put(sk);
@@ -363,16 +375,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
363375
* unless XT_SOCKET_NOWILDCARD is set
364376
*/
365377
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
366-
sk->sk_state != TCP_TIME_WAIT &&
378+
sk_fullsock(sk) &&
367379
ipv6_addr_any(&sk->sk_v6_rcv_saddr));
368380

369381
/* Ignore non-transparent sockets,
370-
if XT_SOCKET_TRANSPARENT is used */
382+
* if XT_SOCKET_TRANSPARENT is used
383+
*/
371384
if (info->flags & XT_SOCKET_TRANSPARENT)
372-
transparent = ((sk->sk_state != TCP_TIME_WAIT &&
373-
inet_sk(sk)->transparent) ||
374-
(sk->sk_state == TCP_TIME_WAIT &&
375-
inet_twsk(sk)->tw_transparent));
385+
transparent = xt_socket_sk_is_transparent(sk);
376386

377387
if (sk != skb->sk)
378388
sock_gen_put(sk);

0 commit comments

Comments
 (0)