Skip to content

Commit b0b9b3d

Browse files
Hugh Dickinstorvalds
authored andcommitted
mm: stop leaking PageTables
4.10-rc loadtest (even on x86, and even without THPCache) fails with "fork: Cannot allocate memory" or some such; and /proc/meminfo shows PageTables growing. Commit 953c66c ("mm: THP page cache support for ppc64") that got merged in rc1 removed the freeing of an unused preallocated pagetable after do_fault_around() has called map_pages(). This is usually a good optimization, so that the followup doesn't have to reallocate one; but it's not sufficient to shift the freeing into alloc_set_pte(), since there are failure cases (most commonly VM_FAULT_RETRY) which never reach finish_fault(). Check and free it at the outer level in do_fault(), then we don't need to worry in alloc_set_pte(), and can restore that to how it was (I cannot find any reason to pte_free() under lock as it was doing). And fix a separate pagetable leak, or crash, introduced by the same change, that could only show up on some ppc64: why does do_set_pmd()'s failure case attempt to withdraw a pagetable when it never deposited one, at the same time overwriting (so leaking) the vmf->prealloc_pte? Residue of an earlier implementation, perhaps? Delete it. Fixes: 953c66c ("mm: THP page cache support for ppc64") Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Michael Neuling <mikey@neuling.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 87bc610 commit b0b9b3d

File tree

1 file changed

+20
-27
lines changed

1 file changed

+20
-27
lines changed

mm/memory.c

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
30083008
ret = 0;
30093009
count_vm_event(THP_FILE_MAPPED);
30103010
out:
3011-
/*
3012-
* If we are going to fallback to pte mapping, do a
3013-
* withdraw with pmd lock held.
3014-
*/
3015-
if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
3016-
vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
3017-
vmf->pmd);
30183011
spin_unlock(vmf->ptl);
30193012
return ret;
30203013
}
@@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
30553048

30563049
ret = do_set_pmd(vmf, page);
30573050
if (ret != VM_FAULT_FALLBACK)
3058-
goto fault_handled;
3051+
return ret;
30593052
}
30603053

30613054
if (!vmf->pte) {
30623055
ret = pte_alloc_one_map(vmf);
30633056
if (ret)
3064-
goto fault_handled;
3057+
return ret;
30653058
}
30663059

30673060
/* Re-check under ptl */
3068-
if (unlikely(!pte_none(*vmf->pte))) {
3069-
ret = VM_FAULT_NOPAGE;
3070-
goto fault_handled;
3071-
}
3061+
if (unlikely(!pte_none(*vmf->pte)))
3062+
return VM_FAULT_NOPAGE;
30723063

30733064
flush_icache_page(vma, page);
30743065
entry = mk_pte(page, vma->vm_page_prot);
@@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
30883079

30893080
/* no need to invalidate: a not-present page won't be cached */
30903081
update_mmu_cache(vma, vmf->address, vmf->pte);
3091-
ret = 0;
30923082

3093-
fault_handled:
3094-
/* preallocated pagetable is unused: free it */
3095-
if (vmf->prealloc_pte) {
3096-
pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
3097-
vmf->prealloc_pte = 0;
3098-
}
3099-
return ret;
3083+
return 0;
31003084
}
31013085

31023086

@@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf)
33603344
static int do_fault(struct vm_fault *vmf)
33613345
{
33623346
struct vm_area_struct *vma = vmf->vma;
3347+
int ret;
33633348

33643349
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
33653350
if (!vma->vm_ops->fault)
3366-
return VM_FAULT_SIGBUS;
3367-
if (!(vmf->flags & FAULT_FLAG_WRITE))
3368-
return do_read_fault(vmf);
3369-
if (!(vma->vm_flags & VM_SHARED))
3370-
return do_cow_fault(vmf);
3371-
return do_shared_fault(vmf);
3351+
ret = VM_FAULT_SIGBUS;
3352+
else if (!(vmf->flags & FAULT_FLAG_WRITE))
3353+
ret = do_read_fault(vmf);
3354+
else if (!(vma->vm_flags & VM_SHARED))
3355+
ret = do_cow_fault(vmf);
3356+
else
3357+
ret = do_shared_fault(vmf);
3358+
3359+
/* preallocated pagetable is unused: free it */
3360+
if (vmf->prealloc_pte) {
3361+
pte_free(vma->vm_mm, vmf->prealloc_pte);
3362+
vmf->prealloc_pte = 0;
3363+
}
3364+
return ret;
33723365
}
33733366

33743367
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,

0 commit comments

Comments
 (0)