Skip to content

Commit d66280b

Browse files
Peter Oskolkovdavem330
authored andcommitted
net: netem: use a list in addition to rbtree
When testing high-bandwidth TCP streams with large windows, high latency, and low jitter, netem consumes a lot of CPU cycles doing rbtree rebalancing. This patch uses a linear list/queue in addition to the rbtree: if an incoming packet is past the tail of the linear queue, it is added there, otherwise it is inserted into the rbtree. Without this patch, perf shows netem_enqueue, netem_dequeue, and rb_* functions among the top offenders. With this patch, only netem_enqueue is noticeable if jitter is low/absent. Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Peter Oskolkov <posk@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 932c441 commit d66280b

File tree

1 file changed

+69
-20
lines changed

1 file changed

+69
-20
lines changed

net/sched/sch_netem.c

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ struct netem_sched_data {
7777
/* internal t(ime)fifo qdisc uses t_root and sch->limit */
7878
struct rb_root t_root;
7979

80+
/* a linear queue; reduces rbtree rebalancing when jitter is low */
81+
struct sk_buff *t_head;
82+
struct sk_buff *t_tail;
83+
8084
/* optional qdisc for classful handling (NULL at netem init) */
8185
struct Qdisc *qdisc;
8286

@@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
369373
rb_erase(&skb->rbnode, &q->t_root);
370374
rtnl_kfree_skbs(skb, skb);
371375
}
376+
377+
rtnl_kfree_skbs(q->t_head, q->t_tail);
378+
q->t_head = NULL;
379+
q->t_tail = NULL;
372380
}
373381

374382
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
375383
{
376384
struct netem_sched_data *q = qdisc_priv(sch);
377385
u64 tnext = netem_skb_cb(nskb)->time_to_send;
378-
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
379386

380-
while (*p) {
381-
struct sk_buff *skb;
382-
383-
parent = *p;
384-
skb = rb_to_skb(parent);
385-
if (tnext >= netem_skb_cb(skb)->time_to_send)
386-
p = &parent->rb_right;
387+
if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
388+
if (q->t_tail)
389+
q->t_tail->next = nskb;
387390
else
388-
p = &parent->rb_left;
391+
q->t_head = nskb;
392+
q->t_tail = nskb;
393+
} else {
394+
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
395+
396+
while (*p) {
397+
struct sk_buff *skb;
398+
399+
parent = *p;
400+
skb = rb_to_skb(parent);
401+
if (tnext >= netem_skb_cb(skb)->time_to_send)
402+
p = &parent->rb_right;
403+
else
404+
p = &parent->rb_left;
405+
}
406+
rb_link_node(&nskb->rbnode, parent, p);
407+
rb_insert_color(&nskb->rbnode, &q->t_root);
389408
}
390-
rb_link_node(&nskb->rbnode, parent, p);
391-
rb_insert_color(&nskb->rbnode, &q->t_root);
392409
sch->q.qlen++;
393410
}
394411

@@ -530,9 +547,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
530547
t_skb = skb_rb_last(&q->t_root);
531548
t_last = netem_skb_cb(t_skb);
532549
if (!last ||
533-
t_last->time_to_send > last->time_to_send) {
550+
t_last->time_to_send > last->time_to_send)
551+
last = t_last;
552+
}
553+
if (q->t_tail) {
554+
struct netem_skb_cb *t_last =
555+
netem_skb_cb(q->t_tail);
556+
557+
if (!last ||
558+
t_last->time_to_send > last->time_to_send)
534559
last = t_last;
535-
}
536560
}
537561

538562
if (last) {
@@ -611,11 +635,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
611635
q->slot.bytes_left = q->slot_config.max_bytes;
612636
}
613637

638+
static struct sk_buff *netem_peek(struct netem_sched_data *q)
639+
{
640+
struct sk_buff *skb = skb_rb_first(&q->t_root);
641+
u64 t1, t2;
642+
643+
if (!skb)
644+
return q->t_head;
645+
if (!q->t_head)
646+
return skb;
647+
648+
t1 = netem_skb_cb(skb)->time_to_send;
649+
t2 = netem_skb_cb(q->t_head)->time_to_send;
650+
if (t1 < t2)
651+
return skb;
652+
return q->t_head;
653+
}
654+
655+
static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
656+
{
657+
if (skb == q->t_head) {
658+
q->t_head = skb->next;
659+
if (!q->t_head)
660+
q->t_tail = NULL;
661+
} else {
662+
rb_erase(&skb->rbnode, &q->t_root);
663+
}
664+
}
665+
614666
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
615667
{
616668
struct netem_sched_data *q = qdisc_priv(sch);
617669
struct sk_buff *skb;
618-
struct rb_node *p;
619670

620671
tfifo_dequeue:
621672
skb = __qdisc_dequeue_head(&sch->q);
@@ -625,20 +676,18 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
625676
qdisc_bstats_update(sch, skb);
626677
return skb;
627678
}
628-
p = rb_first(&q->t_root);
629-
if (p) {
679+
skb = netem_peek(q);
680+
if (skb) {
630681
u64 time_to_send;
631682
u64 now = ktime_get_ns();
632683

633-
skb = rb_to_skb(p);
634-
635684
/* if more time remaining? */
636685
time_to_send = netem_skb_cb(skb)->time_to_send;
637686
if (q->slot.slot_next && q->slot.slot_next < time_to_send)
638687
get_slot_next(q, now);
639688

640-
if (time_to_send <= now && q->slot.slot_next <= now) {
641-
rb_erase(p, &q->t_root);
689+
if (time_to_send <= now && q->slot.slot_next <= now) {
690+
netem_erase_head(q, skb);
642691
sch->q.qlen--;
643692
qdisc_qstats_backlog_dec(sch, skb);
644693
skb->next = NULL;

0 commit comments

Comments
 (0)