Skip to content

Commit 4a17fc3

Browse files
ij1davem330
authored andcommitted
tcp: collapse more than two on retransmission
I always had thought that collapsing up to two at a time was intentional decision to avoid excessive processing if 1 byte sized skbs are to be combined for a full mtu, and consecutive retransmissions would make the size of the retransmittee double each round anyway, but some recent discussion made me to understand that was not the case. Thus make collapse work more and wait less. It would be possible to take advantage of the shifting machinery (added in the later patch) in the case of paged data but that can be implemented on top of this change. tcp_skb_is_last check is now provided by the loop. I tested a bit (ss-after-idle-off, fill 4096x4096B xfer, 10s sleep + 4096 x 1byte writes while dropping them for some a while with netem): . 16774097:16775545(1448) ack 1 win 46 . 16775545:16776993(1448) ack 1 win 46 . ack 16759617 win 2399 P 16776993:16777217(224) ack 1 win 46 . ack 16762513 win 2399 . ack 16765409 win 2399 . ack 16768305 win 2399 . ack 16771201 win 2399 . ack 16774097 win 2399 . ack 16776993 win 2399 . ack 16777217 win 2399 P 16777217:16777257(40) ack 1 win 46 . ack 16777257 win 2399 P 16777257:16778705(1448) ack 1 win 46 P 16778705:16780153(1448) ack 1 win 46 FP 16780153:16781313(1160) ack 1 win 46 . ack 16778705 win 2399 . ack 16780153 win 2399 F 1:1(0) ack 16781314 win 2399 While without drop-all period I get this: . 16773585:16775033(1448) ack 1 win 46 . ack 16764897 win 9367 . ack 16767793 win 9367 . ack 16770689 win 9367 . ack 16773585 win 9367 . 16775033:16776481(1448) ack 1 win 46 P 16776481:16777217(736) ack 1 win 46 . ack 16776481 win 9367 . ack 16777217 win 9367 P 16777217:16777218(1) ack 1 win 46 P 16777218:16777219(1) ack 1 win 46 P 16777219:16777220(1) ack 1 win 46 ... P 16777247:16777248(1) ack 1 win 46 . ack 16777218 win 9367 . ack 16777219 win 9367 ... . ack 16777233 win 9367 . ack 16777248 win 9367 P 16777248:16778696(1448) ack 1 win 46 P 16778696:16780144(1448) ack 1 win 46 FP 16780144:16781313(1169) ack 1 win 46 . ack 16780144 win 9367 F 1:1(0) ack 16781314 win 9367 The window seems to be 30-40 segments, which were successfully combined into: P 16777217:16777257(40) ack 1 win 46 Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent a21bba9 commit 4a17fc3

File tree

1 file changed

+59
-37
lines changed

1 file changed

+59
-37
lines changed

net/ipv4/tcp_output.c

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,46 +1766,22 @@ u32 __tcp_select_window(struct sock *sk)
17661766
return window;
17671767
}
17681768

1769-
/* Attempt to collapse two adjacent SKB's during retransmission. */
1770-
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1771-
int mss_now)
1769+
/* Collapses two adjacent SKB's during retransmission. */
1770+
static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
17721771
{
17731772
struct tcp_sock *tp = tcp_sk(sk);
17741773
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
17751774
int skb_size, next_skb_size;
17761775
u16 flags;
17771776

1778-
/* The first test we must make is that neither of these two
1779-
* SKB's are still referenced by someone else.
1780-
*/
1781-
if (skb_cloned(skb) || skb_cloned(next_skb))
1782-
return;
1783-
17841777
skb_size = skb->len;
17851778
next_skb_size = next_skb->len;
17861779
flags = TCP_SKB_CB(skb)->flags;
17871780

1788-
/* Also punt if next skb has been SACK'd. */
1789-
if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1790-
return;
1791-
1792-
/* Next skb is out of window. */
1793-
if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1794-
return;
1795-
1796-
/* Punt if not enough space exists in the first SKB for
1797-
* the data in the second, or the total combined payload
1798-
* would exceed the MSS.
1799-
*/
1800-
if ((next_skb_size > skb_tailroom(skb)) ||
1801-
((skb_size + next_skb_size) > mss_now))
1802-
return;
1803-
18041781
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
18051782

18061783
tcp_highest_sack_combine(sk, next_skb, skb);
18071784

1808-
/* Ok. We will be able to collapse the packet. */
18091785
tcp_unlink_write_queue(next_skb, sk);
18101786

18111787
skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
@@ -1847,6 +1823,62 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
18471823
sk_wmem_free_skb(sk, next_skb);
18481824
}
18491825

1826+
static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1827+
{
1828+
if (tcp_skb_pcount(skb) > 1)
1829+
return 0;
1830+
/* TODO: SACK collapsing could be used to remove this condition */
1831+
if (skb_shinfo(skb)->nr_frags != 0)
1832+
return 0;
1833+
if (skb_cloned(skb))
1834+
return 0;
1835+
if (skb == tcp_send_head(sk))
1836+
return 0;
1837+
/* Some heurestics for collapsing over SACK'd could be invented */
1838+
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1839+
return 0;
1840+
1841+
return 1;
1842+
}
1843+
1844+
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
1845+
int space)
1846+
{
1847+
struct tcp_sock *tp = tcp_sk(sk);
1848+
struct sk_buff *skb = to, *tmp;
1849+
int first = 1;
1850+
1851+
if (!sysctl_tcp_retrans_collapse)
1852+
return;
1853+
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
1854+
return;
1855+
1856+
tcp_for_write_queue_from_safe(skb, tmp, sk) {
1857+
if (!tcp_can_collapse(sk, skb))
1858+
break;
1859+
1860+
space -= skb->len;
1861+
1862+
if (first) {
1863+
first = 0;
1864+
continue;
1865+
}
1866+
1867+
if (space < 0)
1868+
break;
1869+
/* Punt if not enough space exists in the first SKB for
1870+
* the data in the second
1871+
*/
1872+
if (skb->len > skb_tailroom(to))
1873+
break;
1874+
1875+
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
1876+
break;
1877+
1878+
tcp_collapse_retrans(sk, to);
1879+
}
1880+
}
1881+
18501882
/* Do a simple retransmit without using the backoff mechanisms in
18511883
* tcp_timer. This is used for path mtu discovery.
18521884
* The socket is already locked here.
@@ -1946,17 +1978,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
19461978
return -ENOMEM; /* We'll try again later. */
19471979
}
19481980

1949-
/* Collapse two adjacent packets if worthwhile and we can. */
1950-
if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
1951-
(skb->len < (cur_mss >> 1)) &&
1952-
(!tcp_skb_is_last(sk, skb)) &&
1953-
(tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1954-
(skb_shinfo(skb)->nr_frags == 0 &&
1955-
skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1956-
(tcp_skb_pcount(skb) == 1 &&
1957-
tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1958-
(sysctl_tcp_retrans_collapse != 0))
1959-
tcp_retrans_try_collapse(sk, skb, cur_mss);
1981+
tcp_retrans_try_collapse(sk, skb, cur_mss);
19601982

19611983
/* Some Solaris stacks overoptimize and ignore the FIN on a
19621984
* retransmit when old data is attached. So strip it off

0 commit comments

Comments
 (0)