Skip to content

Commit 4949148

Browse files
Vladimir Davydovtorvalds
authored andcommitted
mm: charge/uncharge kmemcg from generic page allocator paths
Currently, to charge a non-slab allocation to kmemcg one has to use alloc_kmem_pages helper with __GFP_ACCOUNT flag. A page allocated with this helper should finally be freed using free_kmem_pages, otherwise it won't be uncharged. This API suits its current users fine, but it turns out to be impossible to use along with page reference counting, i.e. when an allocation is supposed to be freed with put_page, as it is the case with pipe or unix socket buffers. To overcome this limitation, this patch moves charging/uncharging to generic page allocator paths, i.e. to __alloc_pages_nodemask and free_pages_prepare, and zaps alloc/free_kmem_pages helpers. This way, one can use any of the available page allocation functions to get the allocated page charged to kmemcg - it's enough to pass __GFP_ACCOUNT, just like in case of kmalloc and friends. A charged page will be automatically uncharged on free. To make it possible, we need to mark pages charged to kmemcg somehow. To avoid introducing a new page flag, we make use of page->_mapcount for marking such pages. Since pages charged to kmemcg are not supposed to be mapped to userspace, it should work just fine. There are other (ab)users of page->_mapcount - buddy and balloon pages - but we don't conflict with them. In case kmemcg is compiled out or not used at runtime, this patch introduces no overhead to generic page allocator paths. If kmemcg is used, it will be plus one gfp flags check on alloc and plus one page->_mapcount check on free, which shouldn't hurt performance, because the data accessed are hot. Link: http://lkml.kernel.org/r/a9736d856f895bcb465d9f257b54efe32eda6f99.1464079538.git.vdavydov@virtuozzo.com Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 4526477 commit 4949148

File tree

7 files changed

+31
-72
lines changed

7 files changed

+31
-72
lines changed

include/linux/gfp.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ struct vm_area_struct;
7878
* __GFP_THISNODE forces the allocation to be satisified from the requested
7979
* node with no fallbacks or placement policy enforcements.
8080
*
81-
* __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant
82-
* to kmem allocations).
81+
* __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
8382
*/
8483
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
8584
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
@@ -486,10 +485,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
486485
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
487486
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
488487

489-
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
490-
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
491-
unsigned int order);
492-
493488
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
494489
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
495490

@@ -513,9 +508,6 @@ extern void *__alloc_page_frag(struct page_frag_cache *nc,
513508
unsigned int fragsz, gfp_t gfp_mask);
514509
extern void __free_page_frag(void *addr);
515510

516-
extern void __free_kmem_pages(struct page *page, unsigned int order);
517-
extern void free_kmem_pages(unsigned long addr, unsigned int order);
518-
519511
#define __free_page(page) __free_pages((page), 0)
520512
#define free_page(addr) free_pages((addr), 0)
521513

include/linux/page-flags.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,13 @@ PAGE_MAPCOUNT_OPS(Buddy, BUDDY)
641641
#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
642642
PAGE_MAPCOUNT_OPS(Balloon, BALLOON)
643643

644+
/*
645+
* If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
646+
* pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
647+
*/
648+
#define PAGE_KMEMCG_MAPCOUNT_VALUE (-512)
649+
PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG)
650+
644651
extern bool is_free_buddy_page(struct page *page);
645652

646653
__PAGEFLAG(Isolated, isolated, PF_ANY);

kernel/fork.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ void __weak arch_release_thread_stack(unsigned long *stack)
162162
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
163163
int node)
164164
{
165-
struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
166-
THREAD_SIZE_ORDER);
165+
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
166+
THREAD_SIZE_ORDER);
167167

168168
if (page)
169169
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
@@ -178,7 +178,7 @@ static inline void free_thread_stack(unsigned long *stack)
178178

179179
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
180180
-(1 << THREAD_SIZE_ORDER));
181-
__free_kmem_pages(page, THREAD_SIZE_ORDER);
181+
__free_pages(page, THREAD_SIZE_ORDER);
182182
}
183183
# else
184184
static struct kmem_cache *thread_stack_cache;

mm/page_alloc.c

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#include <linux/sched/rt.h>
6464
#include <linux/page_owner.h>
6565
#include <linux/kthread.h>
66+
#include <linux/memcontrol.h>
6667

6768
#include <asm/sections.h>
6869
#include <asm/tlbflush.h>
@@ -1018,6 +1019,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
10181019
}
10191020
if (PageMappingFlags(page))
10201021
page->mapping = NULL;
1022+
if (memcg_kmem_enabled() && PageKmemcg(page)) {
1023+
memcg_kmem_uncharge(page, order);
1024+
__ClearPageKmemcg(page);
1025+
}
10211026
if (check_free)
10221027
bad += free_pages_check(page);
10231028
if (bad)
@@ -3841,6 +3846,14 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
38413846
}
38423847

38433848
out:
3849+
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
3850+
if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
3851+
__free_pages(page, order);
3852+
page = NULL;
3853+
} else
3854+
__SetPageKmemcg(page);
3855+
}
3856+
38443857
if (kmemcheck_enabled && page)
38453858
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
38463859

@@ -3996,59 +4009,6 @@ void __free_page_frag(void *addr)
39964009
}
39974010
EXPORT_SYMBOL(__free_page_frag);
39984011

3999-
/*
4000-
* alloc_kmem_pages charges newly allocated pages to the kmem resource counter
4001-
* of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
4002-
* equivalent to alloc_pages.
4003-
*
4004-
* It should be used when the caller would like to use kmalloc, but since the
4005-
* allocation is large, it has to fall back to the page allocator.
4006-
*/
4007-
struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
4008-
{
4009-
struct page *page;
4010-
4011-
page = alloc_pages(gfp_mask, order);
4012-
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
4013-
page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
4014-
__free_pages(page, order);
4015-
page = NULL;
4016-
}
4017-
return page;
4018-
}
4019-
4020-
struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
4021-
{
4022-
struct page *page;
4023-
4024-
page = alloc_pages_node(nid, gfp_mask, order);
4025-
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
4026-
page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
4027-
__free_pages(page, order);
4028-
page = NULL;
4029-
}
4030-
return page;
4031-
}
4032-
4033-
/*
4034-
* __free_kmem_pages and free_kmem_pages will free pages allocated with
4035-
* alloc_kmem_pages.
4036-
*/
4037-
void __free_kmem_pages(struct page *page, unsigned int order)
4038-
{
4039-
if (memcg_kmem_enabled())
4040-
memcg_kmem_uncharge(page, order);
4041-
__free_pages(page, order);
4042-
}
4043-
4044-
void free_kmem_pages(unsigned long addr, unsigned int order)
4045-
{
4046-
if (addr != 0) {
4047-
VM_BUG_ON(!virt_addr_valid((void *)addr));
4048-
__free_kmem_pages(virt_to_page((void *)addr), order);
4049-
}
4050-
}
4051-
40524012
static void *make_alloc_exact(unsigned long addr, unsigned int order,
40534013
size_t size)
40544014
{

mm/slab_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
10121012
struct page *page;
10131013

10141014
flags |= __GFP_COMP;
1015-
page = alloc_kmem_pages(flags, order);
1015+
page = alloc_pages(flags, order);
10161016
ret = page ? page_address(page) : NULL;
10171017
kmemleak_alloc(ret, size, 1, flags);
10181018
kasan_kmalloc_large(ret, size, flags);

mm/slub.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2977,7 +2977,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
29772977
if (unlikely(!PageSlab(page))) {
29782978
BUG_ON(!PageCompound(page));
29792979
kfree_hook(object);
2980-
__free_kmem_pages(page, compound_order(page));
2980+
__free_pages(page, compound_order(page));
29812981
p[size] = NULL; /* mark object processed */
29822982
return size;
29832983
}
@@ -3693,7 +3693,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
36933693
void *ptr = NULL;
36943694

36953695
flags |= __GFP_COMP | __GFP_NOTRACK;
3696-
page = alloc_kmem_pages_node(node, flags, get_order(size));
3696+
page = alloc_pages_node(node, flags, get_order(size));
36973697
if (page)
36983698
ptr = page_address(page);
36993699

@@ -3774,7 +3774,7 @@ void kfree(const void *x)
37743774
if (unlikely(!PageSlab(page))) {
37753775
BUG_ON(!PageCompound(page));
37763776
kfree_hook(x);
3777-
__free_kmem_pages(page, compound_order(page));
3777+
__free_pages(page, compound_order(page));
37783778
return;
37793779
}
37803780
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);

mm/vmalloc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
15011501
struct page *page = area->pages[i];
15021502

15031503
BUG_ON(!page);
1504-
__free_kmem_pages(page, 0);
1504+
__free_pages(page, 0);
15051505
}
15061506

15071507
kvfree(area->pages);
@@ -1629,9 +1629,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
16291629
struct page *page;
16301630

16311631
if (node == NUMA_NO_NODE)
1632-
page = alloc_kmem_pages(alloc_mask, order);
1632+
page = alloc_pages(alloc_mask, order);
16331633
else
1634-
page = alloc_kmem_pages_node(node, alloc_mask, order);
1634+
page = alloc_pages_node(node, alloc_mask, order);
16351635

16361636
if (unlikely(!page)) {
16371637
/* Successfully allocated i pages, free them in __vunmap() */

0 commit comments

Comments
 (0)