Skip to content

Commit 2cf8558

Browse files
htejunaxboe
authored andcommitted
memcontrol: schedule throttling if we are congested
Memory allocations can induce swapping via kswapd or direct reclaim. If we are having IO done for us by kswapd and don't actually go into direct reclaim we may never get scheduled for throttling. So instead check to see if our cgroup is congested, and if so schedule the throttling. Before we return to user space the throttling stuff will only throttle if we actually required it. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Josef Bacik <jbacik@fb.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d09d8df commit 2cf8558

File tree

7 files changed

+81
-14
lines changed

7 files changed

+81
-14
lines changed

include/linux/memcontrol.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
317317
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
318318
gfp_t gfp_mask, struct mem_cgroup **memcgp,
319319
bool compound);
320+
int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
321+
gfp_t gfp_mask, struct mem_cgroup **memcgp,
322+
bool compound);
320323
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
321324
bool lrucare, bool compound);
322325
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
789792
return 0;
790793
}
791794

795+
static inline int mem_cgroup_try_charge_delay(struct page *page,
796+
struct mm_struct *mm,
797+
gfp_t gfp_mask,
798+
struct mem_cgroup **memcgp,
799+
bool compound)
800+
{
801+
*memcgp = NULL;
802+
return 0;
803+
}
804+
792805
static inline void mem_cgroup_commit_charge(struct page *page,
793806
struct mem_cgroup *memcg,
794807
bool lrucare, bool compound)

include/linux/swap.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,14 +629,23 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
629629

630630
return memcg->swappiness;
631631
}
632-
633632
#else
634633
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
635634
{
636635
return vm_swappiness;
637636
}
638637
#endif
639638

639+
#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
640+
extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
641+
gfp_t gfp_mask);
642+
#else
643+
static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
644+
int node, gfp_t gfp_mask)
645+
{
646+
}
647+
#endif
648+
640649
#ifdef CONFIG_MEMCG_SWAP
641650
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
642651
extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);

mm/huge_memory.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
552552

553553
VM_BUG_ON_PAGE(!PageCompound(page), page);
554554

555-
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
555+
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
556556
put_page(page);
557557
count_vm_event(THP_FAULT_FALLBACK);
558558
return VM_FAULT_FALLBACK;
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
11421142
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
11431143
vmf->address, page_to_nid(page));
11441144
if (unlikely(!pages[i] ||
1145-
mem_cgroup_try_charge(pages[i], vma->vm_mm,
1145+
mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
11461146
GFP_KERNEL, &memcg, false))) {
11471147
if (pages[i])
11481148
put_page(pages[i]);
@@ -1312,7 +1312,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
13121312
goto out;
13131313
}
13141314

1315-
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
1315+
if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
13161316
huge_gfp, &memcg, true))) {
13171317
put_page(new_page);
13181318
split_huge_pmd(vma, vmf->pmd, vmf->address);

mm/memcontrol.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5593,6 +5593,19 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
55935593
return ret;
55945594
}
55955595

5596+
int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
5597+
gfp_t gfp_mask, struct mem_cgroup **memcgp,
5598+
bool compound)
5599+
{
5600+
struct mem_cgroup *memcg;
5601+
int ret;
5602+
5603+
ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
5604+
memcg = *memcgp;
5605+
mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
5606+
return ret;
5607+
}
5608+
55965609
/**
55975610
* mem_cgroup_commit_charge - commit a page charge
55985611
* @page: page to charge

mm/memory.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf)
25032503
cow_user_page(new_page, old_page, vmf->address, vma);
25042504
}
25052505

2506-
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
2506+
if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
25072507
goto oom_free_new;
25082508

25092509
__SetPageUptodate(new_page);
@@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf)
30033003
goto out_page;
30043004
}
30053005

3006-
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
3007-
&memcg, false)) {
3006+
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
3007+
&memcg, false)) {
30083008
ret = VM_FAULT_OOM;
30093009
goto out_page;
30103010
}
@@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
31653165
if (!page)
31663166
goto oom;
31673167

3168-
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
3168+
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
3169+
false))
31693170
goto oom_free_page;
31703171

31713172
/*
@@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf)
36613662
if (!vmf->cow_page)
36623663
return VM_FAULT_OOM;
36633664

3664-
if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
3665+
if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
36653666
&vmf->memcg, false)) {
36663667
put_page(vmf->cow_page);
36673668
return VM_FAULT_OOM;

mm/shmem.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
12391239
* the shmem_swaplist_mutex which might hold up shmem_writepage().
12401240
* Charged back to the user (not to caller) when swap account is used.
12411241
*/
1242-
error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
1243-
false);
1242+
error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
1243+
&memcg, false);
12441244
if (error)
12451245
goto out;
12461246
/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1712,7 +1712,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
17121712
goto failed;
17131713
}
17141714

1715-
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
1715+
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
17161716
false);
17171717
if (!error) {
17181718
error = shmem_add_to_page_cache(page, mapping, index,
@@ -1818,7 +1818,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode,
18181818
if (sgp == SGP_WRITE)
18191819
__SetPageReferenced(page);
18201820

1821-
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
1821+
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
18221822
PageTransHuge(page));
18231823
if (error)
18241824
goto unacct;
@@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
22912291
__SetPageSwapBacked(page);
22922292
__SetPageUptodate(page);
22932293

2294-
ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
2294+
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
22952295
if (ret)
22962296
goto out_release;
22972297

mm/swapfile.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
37313731
}
37323732
}
37333733

3734+
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
3735+
void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
3736+
gfp_t gfp_mask)
3737+
{
3738+
struct swap_info_struct *si, *next;
3739+
if (!(gfp_mask & __GFP_IO) || !memcg)
3740+
return;
3741+
3742+
if (!blk_cgroup_congested())
3743+
return;
3744+
3745+
/*
3746+
* We've already scheduled a throttle, avoid taking the global swap
3747+
* lock.
3748+
*/
3749+
if (current->throttle_queue)
3750+
return;
3751+
3752+
spin_lock(&swap_avail_lock);
3753+
plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
3754+
avail_lists[node]) {
3755+
if (si->bdev) {
3756+
blkcg_schedule_throttle(bdev_get_queue(si->bdev),
3757+
true);
3758+
break;
3759+
}
3760+
}
3761+
spin_unlock(&swap_avail_lock);
3762+
}
3763+
#endif
3764+
37343765
static int __init swapfile_init(void)
37353766
{
37363767
int nid;

0 commit comments

Comments
 (0)