Skip to content

Commit a8b690f

Browse files
Tom Herbertdavem330
authored andcommitted
tcp: Fix slowness in read /proc/net/tcp
This patch address a serious performance issue in reading the TCP sockets table (/proc/net/tcp). Reading the full table is done by a number of sequential read operations. At each read operation, a seek is done to find the last socket that was previously read. This seek operation requires that the sockets in the table need to be counted up to the current file position, and to count each of these requires taking a lock for each non-empty bucket. The whole algorithm is O(n^2). The fix is to cache the last bucket value, offset within the bucket, and the file position returned by the last read operation. On the next sequential read, the bucket and offset are used to find the last read socket immediately without needing ot scan the previous buckets the table. This algorithm t read the whole table is O(n). The improvement offered by this patch is easily show by performing cat'ing /proc/net/tcp on a machine with a lot of connections. With about 182K connections in the table, I see the following: - Without patch time cat /proc/net/tcp > /dev/null real 1m56.729s user 0m0.214s sys 1m56.344s - With patch time cat /proc/net/tcp > /dev/null real 0m0.894s user 0m0.290s sys 0m0.594s Signed-off-by: Tom Herbert <therbert@google.com> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 83038a2 commit a8b690f

File tree

2 files changed

+86
-9
lines changed

2 files changed

+86
-9
lines changed

include/net/tcp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1413,7 +1413,8 @@ struct tcp_iter_state {
14131413
sa_family_t family;
14141414
enum tcp_seq_states state;
14151415
struct sock *syn_wait_sk;
1416-
int bucket, sbucket, num, uid;
1416+
int bucket, offset, sbucket, num, uid;
1417+
loff_t last_pos;
14171418
};
14181419

14191420
extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);

net/ipv4/tcp_ipv4.c

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
19801980
hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
19811981
}
19821982

1983+
/*
1984+
* Get next listener socket follow cur. If cur is NULL, get first socket
1985+
* starting from bucket given in st->bucket; when st->bucket is zero the
1986+
* very first socket in the hash table is returned.
1987+
*/
19831988
static void *listening_get_next(struct seq_file *seq, void *cur)
19841989
{
19851990
struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
19901995
struct net *net = seq_file_net(seq);
19911996

19921997
if (!sk) {
1993-
st->bucket = 0;
1994-
ilb = &tcp_hashinfo.listening_hash[0];
1998+
ilb = &tcp_hashinfo.listening_hash[st->bucket];
19951999
spin_lock_bh(&ilb->lock);
19962000
sk = sk_nulls_head(&ilb->head);
2001+
st->offset = 0;
19972002
goto get_sk;
19982003
}
19992004
ilb = &tcp_hashinfo.listening_hash[st->bucket];
20002005
++st->num;
2006+
++st->offset;
20012007

20022008
if (st->state == TCP_SEQ_STATE_OPENREQ) {
20032009
struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
20122018
}
20132019
req = req->dl_next;
20142020
}
2021+
st->offset = 0;
20152022
if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
20162023
break;
20172024
get_req:
@@ -2047,6 +2054,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
20472054
read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20482055
}
20492056
spin_unlock_bh(&ilb->lock);
2057+
st->offset = 0;
20502058
if (++st->bucket < INET_LHTABLE_SIZE) {
20512059
ilb = &tcp_hashinfo.listening_hash[st->bucket];
20522060
spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
20602068

20612069
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
20622070
{
2063-
void *rc = listening_get_next(seq, NULL);
2071+
struct tcp_iter_state *st = seq->private;
2072+
void *rc;
2073+
2074+
st->bucket = 0;
2075+
st->offset = 0;
2076+
rc = listening_get_next(seq, NULL);
20642077

20652078
while (rc && *pos) {
20662079
rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
20752088
hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
20762089
}
20772090

2091+
/*
2092+
* Get first established socket starting from bucket given in st->bucket.
2093+
* If st->bucket is zero, the very first socket in the hash is returned.
2094+
*/
20782095
static void *established_get_first(struct seq_file *seq)
20792096
{
20802097
struct tcp_iter_state *st = seq->private;
20812098
struct net *net = seq_file_net(seq);
20822099
void *rc = NULL;
20832100

2084-
for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2101+
st->offset = 0;
2102+
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
20852103
struct sock *sk;
20862104
struct hlist_nulls_node *node;
20872105
struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
21262144
struct net *net = seq_file_net(seq);
21272145

21282146
++st->num;
2147+
++st->offset;
21292148

21302149
if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
21312150
tw = cur;
@@ -2142,6 +2161,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
21422161
st->state = TCP_SEQ_STATE_ESTABLISHED;
21432162

21442163
/* Look for next non empty bucket */
2164+
st->offset = 0;
21452165
while (++st->bucket <= tcp_hashinfo.ehash_mask &&
21462166
empty_bucket(st))
21472167
;
@@ -2169,7 +2189,11 @@ static void *established_get_next(struct seq_file *seq, void *cur)
21692189

21702190
static void *established_get_idx(struct seq_file *seq, loff_t pos)
21712191
{
2172-
void *rc = established_get_first(seq);
2192+
struct tcp_iter_state *st = seq->private;
2193+
void *rc;
2194+
2195+
st->bucket = 0;
2196+
rc = established_get_first(seq);
21732197

21742198
while (rc && pos) {
21752199
rc = established_get_next(seq, rc);
@@ -2194,31 +2218,81 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
21942218
return rc;
21952219
}
21962220

2221+
static void *tcp_seek_last_pos(struct seq_file *seq)
2222+
{
2223+
struct tcp_iter_state *st = seq->private;
2224+
int offset = st->offset;
2225+
int orig_num = st->num;
2226+
void *rc = NULL;
2227+
2228+
switch (st->state) {
2229+
case TCP_SEQ_STATE_OPENREQ:
2230+
case TCP_SEQ_STATE_LISTENING:
2231+
if (st->bucket >= INET_LHTABLE_SIZE)
2232+
break;
2233+
st->state = TCP_SEQ_STATE_LISTENING;
2234+
rc = listening_get_next(seq, NULL);
2235+
while (offset-- && rc)
2236+
rc = listening_get_next(seq, rc);
2237+
if (rc)
2238+
break;
2239+
st->bucket = 0;
2240+
/* Fallthrough */
2241+
case TCP_SEQ_STATE_ESTABLISHED:
2242+
case TCP_SEQ_STATE_TIME_WAIT:
2243+
st->state = TCP_SEQ_STATE_ESTABLISHED;
2244+
if (st->bucket > tcp_hashinfo.ehash_mask)
2245+
break;
2246+
rc = established_get_first(seq);
2247+
while (offset-- && rc)
2248+
rc = established_get_next(seq, rc);
2249+
}
2250+
2251+
st->num = orig_num;
2252+
2253+
return rc;
2254+
}
2255+
21972256
static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
21982257
{
21992258
struct tcp_iter_state *st = seq->private;
2259+
void *rc;
2260+
2261+
if (*pos && *pos == st->last_pos) {
2262+
rc = tcp_seek_last_pos(seq);
2263+
if (rc)
2264+
goto out;
2265+
}
2266+
22002267
st->state = TCP_SEQ_STATE_LISTENING;
22012268
st->num = 0;
2202-
return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2269+
st->bucket = 0;
2270+
st->offset = 0;
2271+
rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2272+
2273+
out:
2274+
st->last_pos = *pos;
2275+
return rc;
22032276
}
22042277

22052278
static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
22062279
{
2280+
struct tcp_iter_state *st = seq->private;
22072281
void *rc = NULL;
2208-
struct tcp_iter_state *st;
22092282

22102283
if (v == SEQ_START_TOKEN) {
22112284
rc = tcp_get_idx(seq, 0);
22122285
goto out;
22132286
}
2214-
st = seq->private;
22152287

22162288
switch (st->state) {
22172289
case TCP_SEQ_STATE_OPENREQ:
22182290
case TCP_SEQ_STATE_LISTENING:
22192291
rc = listening_get_next(seq, v);
22202292
if (!rc) {
22212293
st->state = TCP_SEQ_STATE_ESTABLISHED;
2294+
st->bucket = 0;
2295+
st->offset = 0;
22222296
rc = established_get_first(seq);
22232297
}
22242298
break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
22292303
}
22302304
out:
22312305
++*pos;
2306+
st->last_pos = *pos;
22322307
return rc;
22332308
}
22342309

@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
22672342

22682343
s = ((struct seq_file *)file->private_data)->private;
22692344
s->family = afinfo->family;
2345+
s->last_pos = 0;
22702346
return 0;
22712347
}
22722348

0 commit comments

Comments
 (0)