Skip to content

Commit ffde732

Browse files
Alexander Duyckdavem330
authored andcommitted
net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag
This patch splits the netdev_alloc_frag function up so that it can be used on one of two page frag pools instead of being fixed on the netdev_alloc_cache. By doing this we can add a NAPI specific function __napi_alloc_frag that accesses a pool that is only used from softirq context. The advantage to this is that we do not need to call local_irq_save/restore which can be a significant savings. I also took the opportunity to refactor the core bits that were placed in __alloc_page_frag. First I updated the allocation to do either a 32K allocation or an order 0 page. This is based on the changes in commmit d9b2938 where it was found that latencies could be reduced in case of failures. Then I also rewrote the logic to work from the end of the page to the start. By doing this the size value doesn't have to be used unless we have run out of space for page fragments. Finally I cleaned up the atomic bits so that we just do an atomic_sub_and_test and if that returns true then we set the page->_count via an atomic_set. This way we can remove the extra conditional for the atomic_read since it would have led to an atomic_inc in the case of success anyway. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 6e5f59a commit ffde732

File tree

2 files changed

+79
-40
lines changed

2 files changed

+79
-40
lines changed

include/linux/skbuff.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
21642164
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
21652165
}
21662166

2167+
void *napi_alloc_frag(unsigned int fragsz);
2168+
21672169
/**
21682170
* __dev_alloc_pages - allocate page for network Rx
21692171
* @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx

net/core/skbuff.c

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
336336
unsigned int pagecnt_bias;
337337
};
338338
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
339+
static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
339340

340-
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
341+
static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
342+
gfp_t gfp_mask)
341343
{
342-
struct netdev_alloc_cache *nc;
343-
void *data = NULL;
344-
int order;
345-
unsigned long flags;
344+
const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
345+
struct page *page = NULL;
346+
gfp_t gfp = gfp_mask;
347+
348+
if (order) {
349+
gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
350+
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
351+
nc->frag.size = PAGE_SIZE << (page ? order : 0);
352+
}
346353

347-
local_irq_save(flags);
348-
nc = this_cpu_ptr(&netdev_alloc_cache);
349-
if (unlikely(!nc->frag.page)) {
354+
if (unlikely(!page))
355+
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
356+
357+
nc->frag.page = page;
358+
359+
return page;
360+
}
361+
362+
static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
363+
unsigned int fragsz, gfp_t gfp_mask)
364+
{
365+
struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
366+
struct page *page = nc->frag.page;
367+
unsigned int size;
368+
int offset;
369+
370+
if (unlikely(!page)) {
350371
refill:
351-
for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
352-
gfp_t gfp = gfp_mask;
372+
page = __page_frag_refill(nc, gfp_mask);
373+
if (!page)
374+
return NULL;
375+
376+
/* if size can vary use frag.size else just use PAGE_SIZE */
377+
size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
353378

354-
if (order)
355-
gfp |= __GFP_COMP | __GFP_NOWARN;
356-
nc->frag.page = alloc_pages(gfp, order);
357-
if (likely(nc->frag.page))
358-
break;
359-
if (--order < 0)
360-
goto end;
361-
}
362-
nc->frag.size = PAGE_SIZE << order;
363379
/* Even if we own the page, we do not use atomic_set().
364380
* This would break get_page_unless_zero() users.
365381
*/
366-
atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
367-
&nc->frag.page->_count);
368-
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
369-
nc->frag.offset = 0;
382+
atomic_add(size - 1, &page->_count);
383+
384+
/* reset page count bias and offset to start of new frag */
385+
nc->pagecnt_bias = size;
386+
nc->frag.offset = size;
370387
}
371388

372-
if (nc->frag.offset + fragsz > nc->frag.size) {
373-
if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
374-
if (!atomic_sub_and_test(nc->pagecnt_bias,
375-
&nc->frag.page->_count))
376-
goto refill;
377-
/* OK, page count is 0, we can safely set it */
378-
atomic_set(&nc->frag.page->_count,
379-
NETDEV_PAGECNT_MAX_BIAS);
380-
} else {
381-
atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
382-
&nc->frag.page->_count);
383-
}
384-
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
385-
nc->frag.offset = 0;
389+
offset = nc->frag.offset - fragsz;
390+
if (unlikely(offset < 0)) {
391+
if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
392+
goto refill;
393+
394+
/* if size can vary use frag.size else just use PAGE_SIZE */
395+
size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
396+
397+
/* OK, page count is 0, we can safely set it */
398+
atomic_set(&page->_count, size);
399+
400+
/* reset page count bias and offset to start of new frag */
401+
nc->pagecnt_bias = size;
402+
offset = size - fragsz;
386403
}
387404

388-
data = page_address(nc->frag.page) + nc->frag.offset;
389-
nc->frag.offset += fragsz;
390405
nc->pagecnt_bias--;
391-
end:
406+
nc->frag.offset = offset;
407+
408+
return page_address(page) + offset;
409+
}
410+
411+
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
412+
{
413+
unsigned long flags;
414+
void *data;
415+
416+
local_irq_save(flags);
417+
data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
392418
local_irq_restore(flags);
393419
return data;
394420
}
@@ -406,6 +432,17 @@ void *netdev_alloc_frag(unsigned int fragsz)
406432
}
407433
EXPORT_SYMBOL(netdev_alloc_frag);
408434

435+
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
436+
{
437+
return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
438+
}
439+
440+
void *napi_alloc_frag(unsigned int fragsz)
441+
{
442+
return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
443+
}
444+
EXPORT_SYMBOL(napi_alloc_frag);
445+
409446
/**
410447
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
411448
* @dev: network device to receive on

0 commit comments

Comments
 (0)