Skip to content

Commit a0370b3

Browse files
yuchungchengdavem330
authored andcommitted
tcp: enable RACK loss detection to trigger recovery
This patch changes two things: 1. Start fast recovery with RACK in addition to other heuristics (e.g., DUPACK threshold, FACK). Prior to this change RACK is enabled to detect losses only after the recovery has started by other algorithms. 2. Disable TCP early retransmit. RACK subsumes the early retransmit with the new reordering timer feature. A latter patch in this series removes the early retransmit code. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 98e36d4 commit a0370b3

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

include/net/tcp.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,9 @@ extern int sysctl_tcp_slow_start_after_idle;
262262
extern int sysctl_tcp_thin_linear_timeouts;
263263
extern int sysctl_tcp_thin_dupack;
264264
extern int sysctl_tcp_early_retrans;
265+
extern int sysctl_tcp_recovery;
266+
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
267+
265268
extern int sysctl_tcp_limit_output_bytes;
266269
extern int sysctl_tcp_challenge_ack_limit;
267270
extern int sysctl_tcp_min_tso_segs;
@@ -1043,6 +1046,7 @@ static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
10431046

10441047
tp->do_early_retrans = sysctl_tcp_early_retrans &&
10451048
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
1049+
!(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
10461050
net->ipv4.sysctl_tcp_reordering == 3;
10471051
}
10481052

@@ -1859,13 +1863,6 @@ void tcp_v4_init(void);
18591863
void tcp_init(void);
18601864

18611865
/* tcp_recovery.c */
1862-
1863-
/* Flags to enable various loss recovery features. See below */
1864-
extern int sysctl_tcp_recovery;
1865-
1866-
/* Use TCP RACK to detect (some) tail and retransmit losses */
1867-
#define TCP_RACK_LOST_RETRANS 0x1
1868-
18691866
extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
18701867
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
18711868
const struct skb_mstamp *xmit_time,

net/ipv4/tcp_input.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,10 +2129,25 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
21292129
* F.e. after RTO, when all the queue is considered as lost,
21302130
* lost_out = packets_out and in_flight = retrans_out.
21312131
*
2132-
* Essentially, we have now two algorithms counting
2132+
* Essentially, we have now a few algorithms detecting
21332133
* lost packets.
21342134
*
2135-
* FACK: It is the simplest heuristics. As soon as we decided
2135+
* If the receiver supports SACK:
2136+
*
2137+
* RFC6675/3517: It is the conventional algorithm. A packet is
2138+
* considered lost if the number of higher sequence packets
2139+
* SACKed is greater than or equal the DUPACK thoreshold
2140+
* (reordering). This is implemented in tcp_mark_head_lost and
2141+
* tcp_update_scoreboard.
2142+
*
2143+
* RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
2144+
* (2017-) that checks timing instead of counting DUPACKs.
2145+
* Essentially a packet is considered lost if it's not S/ACKed
2146+
* after RTT + reordering_window, where both metrics are
2147+
* dynamically measured and adjusted. This is implemented in
2148+
* tcp_rack_mark_lost.
2149+
*
2150+
* FACK: it is the simplest heuristics. As soon as we decided
21362151
* that something is lost, we decide that _all_ not SACKed
21372152
* packets until the most forward SACK are lost. I.e.
21382153
* lost_out = fackets_out - sacked_out and left_out = fackets_out.
@@ -2141,16 +2156,14 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
21412156
* takes place. We use FACK by default until reordering
21422157
* is suspected on the path to this destination.
21432158
*
2144-
* NewReno: when Recovery is entered, we assume that one segment
2159+
* If the receiver does not support SACK:
2160+
*
2161+
* NewReno (RFC6582): in Recovery we assume that one segment
21452162
* is lost (classic Reno). While we are in Recovery and
21462163
* a partial ACK arrives, we assume that one more packet
21472164
* is lost (NewReno). This heuristics are the same in NewReno
21482165
* and SACK.
21492166
*
2150-
* Imagine, that's all! Forget about all this shamanism about CWND inflation
2151-
* deflation etc. CWND is real congestion window, never inflated, changes
2152-
* only according to classic VJ rules.
2153-
*
21542167
* Really tricky (and requiring careful tuning) part of algorithm
21552168
* is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
21562169
* The first determines the moment _when_ we should reduce CWND and,
@@ -2807,7 +2820,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
28072820
struct tcp_sock *tp = tcp_sk(sk);
28082821

28092822
/* Use RACK to detect loss */
2810-
if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) {
2823+
if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
28112824
u32 prior_retrans = tp->retrans_out;
28122825

28132826
tcp_rack_mark_lost(sk, ack_time);

net/ipv4/tcp_recovery.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <linux/tcp.h>
22
#include <net/tcp.h>
33

4-
int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
4+
int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
55

66
static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
77
{
@@ -24,7 +24,9 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
2424
(t1->v64 == t2->v64 && after(seq1, seq2));
2525
}
2626

27-
/* Marks a packet lost, if some packet sent later has been (s)acked.
27+
/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
28+
*
29+
* Marks a packet lost, if some packet sent later has been (s)acked.
2830
* The underlying idea is similar to the traditional dupthresh and FACK
2931
* but they look at different metrics:
3032
*
@@ -37,8 +39,10 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
3739
* is being more resilient to reordering by simply allowing some
3840
* "settling delay", instead of tweaking the dupthresh.
3941
*
40-
* The current version is only used after recovery starts but can be
41-
* easily extended to detect the first loss.
42+
* When tcp_rack_detect_loss() detects some packets are lost and we
43+
* are not already in the CA_Recovery state, either tcp_rack_reo_timeout()
44+
* or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
45+
* make us enter the CA_Recovery state.
4246
*/
4347
static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
4448
u32 *reo_timeout)
@@ -54,7 +58,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
5458
* to queuing or delayed ACKs.
5559
*/
5660
reo_wnd = 1000;
57-
if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
61+
if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
5862
reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
5963

6064
tcp_for_write_queue(skb, sk) {
@@ -105,7 +109,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
105109
struct tcp_sock *tp = tcp_sk(sk);
106110
u32 timeout;
107111

108-
if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
112+
if (!tp->rack.advanced)
109113
return;
110114

111115
/* Reset the advanced flag to avoid unnecessary queue scanning */

0 commit comments

Comments
 (0)