Skip to content

Commit 795bb1c

Browse files
netoptimizerdavem330
authored andcommitted
net: bulk free infrastructure for NAPI context, use napi_consume_skb
Discovered that network stack were hitting the kmem_cache/SLUB slowpath when freeing SKBs. Doing bulk free with kmem_cache_free_bulk can speedup this slowpath. NAPI context is a bit special, lets take advantage of that for bulk free'ing SKBs. In NAPI context we are running in softirq, which gives us certain protection. A softirq can run on several CPUs at once. BUT the important part is a softirq will never preempt another softirq running on the same CPU. This gives us the opportunity to access per-cpu variables in softirq context. Extend napi_alloc_cache (before only contained page_frag_cache) to be a struct with a small array based stack for holding SKBs. Introduce a SKB defer and flush API for accessing this. Introduce napi_consume_skb() as replacement for e.g. dev_consume_skb_any() when running in NAPI context. A small trick to handle/detect if we are called from netpoll is to see if budget is 0. In that case, we need to invoke dev_consume_skb_irq(). Joint work with Alexander Duyck. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 18ac559 commit 795bb1c

File tree

3 files changed

+81
-6
lines changed

3 files changed

+81
-6
lines changed

include/linux/skbuff.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,6 +2404,9 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
24042404
{
24052405
return __napi_alloc_skb(napi, length, GFP_ATOMIC);
24062406
}
2407+
void napi_consume_skb(struct sk_buff *skb, int budget);
2408+
2409+
void __kfree_skb_flush(void);
24072410

24082411
/**
24092412
* __dev_alloc_pages - allocate page for network Rx

net/core/dev.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5155,6 +5155,7 @@ static void net_rx_action(struct softirq_action *h)
51555155
}
51565156
}
51575157

5158+
__kfree_skb_flush();
51585159
local_irq_disable();
51595160

51605161
list_splice_tail_init(&sd->poll_list, &list);

net/core/skbuff.c

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
347347
}
348348
EXPORT_SYMBOL(build_skb);
349349

350+
#define NAPI_SKB_CACHE_SIZE 64
351+
352+
struct napi_alloc_cache {
353+
struct page_frag_cache page;
354+
size_t skb_count;
355+
void *skb_cache[NAPI_SKB_CACHE_SIZE];
356+
};
357+
350358
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
351-
static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
359+
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
352360

353361
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
354362
{
@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
378386

379387
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
380388
{
381-
struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
389+
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
382390

383-
return __alloc_page_frag(nc, fragsz, gfp_mask);
391+
return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
384392
}
385393

386394
void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
474482
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
475483
gfp_t gfp_mask)
476484
{
477-
struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
485+
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
478486
struct sk_buff *skb;
479487
void *data;
480488

@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
494502
if (sk_memalloc_socks())
495503
gfp_mask |= __GFP_MEMALLOC;
496504

497-
data = __alloc_page_frag(nc, len, gfp_mask);
505+
data = __alloc_page_frag(&nc->page, len, gfp_mask);
498506
if (unlikely(!data))
499507
return NULL;
500508

@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
505513
}
506514

507515
/* use OR instead of assignment to avoid clearing of bits in mask */
508-
if (nc->pfmemalloc)
516+
if (nc->page.pfmemalloc)
509517
skb->pfmemalloc = 1;
510518
skb->head_frag = 1;
511519

@@ -747,6 +755,69 @@ void consume_skb(struct sk_buff *skb)
747755
}
748756
EXPORT_SYMBOL(consume_skb);
749757

758+
void __kfree_skb_flush(void)
759+
{
760+
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
761+
762+
/* flush skb_cache if containing objects */
763+
if (nc->skb_count) {
764+
kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
765+
nc->skb_cache);
766+
nc->skb_count = 0;
767+
}
768+
}
769+
770+
static void __kfree_skb_defer(struct sk_buff *skb)
771+
{
772+
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
773+
774+
/* drop skb->head and call any destructors for packet */
775+
skb_release_all(skb);
776+
777+
/* record skb to CPU local list */
778+
nc->skb_cache[nc->skb_count++] = skb;
779+
780+
#ifdef CONFIG_SLUB
781+
/* SLUB writes into objects when freeing */
782+
prefetchw(skb);
783+
#endif
784+
785+
/* flush skb_cache if it is filled */
786+
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
787+
kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
788+
nc->skb_cache);
789+
nc->skb_count = 0;
790+
}
791+
}
792+
793+
void napi_consume_skb(struct sk_buff *skb, int budget)
794+
{
795+
if (unlikely(!skb))
796+
return;
797+
798+
/* if budget is 0 assume netpoll w/ IRQs disabled */
799+
if (unlikely(!budget)) {
800+
dev_consume_skb_irq(skb);
801+
return;
802+
}
803+
804+
if (likely(atomic_read(&skb->users) == 1))
805+
smp_rmb();
806+
else if (likely(!atomic_dec_and_test(&skb->users)))
807+
return;
808+
/* if reaching here SKB is ready to free */
809+
trace_consume_skb(skb);
810+
811+
/* if SKB is a clone, don't handle this case */
812+
if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
813+
__kfree_skb(skb);
814+
return;
815+
}
816+
817+
__kfree_skb_defer(skb);
818+
}
819+
EXPORT_SYMBOL(napi_consume_skb);
820+
750821
/* Make sure a field is enclosed inside headers_start/headers_end section */
751822
#define CHECK_SKB_FIELD(field) \
752823
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \

0 commit comments

Comments
 (0)