Skip to content

Commit 043b87d

Browse files
yuchungchengdavem330
authored andcommitted
tcp: more efficient RACK loss detection
Use the new time-ordered list to speed up RACK. The detection logic is identical. But since the list is chronologically ordered by skb_mstamp and contains only skbs not yet acked or sacked, RACK can abort the loop upon hitting skbs that were sent more recently. On YouTube servers this patch reduces the iterations on write queue by 40x. The improvement is even bigger with large BDP networks. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent e208007 commit 043b87d

File tree

1 file changed

+5
-15
lines changed

1 file changed

+5
-15
lines changed

net/ipv4/tcp_recovery.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
4545
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
4646
{
4747
struct tcp_sock *tp = tcp_sk(sk);
48-
struct sk_buff *skb;
48+
struct sk_buff *skb, *n;
4949
u32 reo_wnd;
5050

5151
*reo_timeout = 0;
@@ -58,17 +58,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
5858
if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
5959
reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
6060

61-
tcp_for_write_queue(skb, sk) {
61+
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
62+
tcp_tsorted_anchor) {
6263
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
6364

64-
if (skb == tcp_send_head(sk))
65-
break;
66-
67-
/* Skip ones already (s)acked */
68-
if (!after(scb->end_seq, tp->snd_una) ||
69-
scb->sacked & TCPCB_SACKED_ACKED)
70-
continue;
71-
7265
if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
7366
tp->rack.end_seq, scb->end_seq)) {
7467
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
@@ -81,6 +74,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
8174

8275
if (remaining < 0) {
8376
tcp_rack_mark_skb_lost(sk, skb);
77+
list_del_init(&skb->tcp_tsorted_anchor);
8478
continue;
8579
}
8680

@@ -91,11 +85,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
9185

9286
/* Record maximum wait time (+1 to avoid 0) */
9387
*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
94-
95-
} else if (!(scb->sacked & TCPCB_RETRANS)) {
96-
/* Original data are sent sequentially so stop early
97-
* b/c the rest are all sent after rack_sent
98-
*/
88+
} else {
9989
break;
10090
}
10191
}

0 commit comments

Comments
 (0)