Skip to content

Commit 7a3ef20

Browse files
koct9itorvalds
authored andcommitted
mm: prevent endless growth of anon_vma hierarchy
Constantly forking task causes unlimited grow of anon_vma chain. Each next child allocates new level of anon_vmas and links vma to all previous levels because pages might be inherited from any level. This patch adds heuristic which decides to reuse existing anon_vma instead of forking new one. It adds counter anon_vma->degree which counts linked vmas and directly descending anon_vmas and reuses anon_vma if counter is lower than two. As a result each anon_vma has either vma or at least two descending anon_vmas. In such trees half of nodes are leafs with alive vmas, thus count of anon_vmas is no more than two times bigger than count of vmas. This heuristic reuses anon_vmas as few as possible because each reuse adds false aliasing among vmas and rmap walker ought to scan more ptes when it searches where page is might be mapped. Link: http://lkml.kernel.org/r/20120816024610.GA5350@evergreen.ssec.wisc.edu Fixes: 5beb493 ("mm: change anon_vma linking to fix multi-process server scalability issue") [akpm@linux-foundation.org: fix typo, per Rik] Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com> Reported-by: Daniel Forrest <dan.forrest@ssec.wisc.edu> Tested-by: Michal Hocko <mhocko@suse.cz> Tested-by: Jerome Marchand <jmarchan@redhat.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: <stable@vger.kernel.org> [2.6.34+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 3245d6a commit 7a3ef20

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

include/linux/rmap.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ struct anon_vma {
3636
*/
3737
atomic_t refcount;
3838

39+
/*
40+
* Count of child anon_vmas and VMAs which points to this anon_vma.
41+
*
42+
* This counter is used for making decision about reusing anon_vma
43+
* instead of forking new one. See comments in function anon_vma_clone.
44+
*/
45+
unsigned degree;
46+
47+
struct anon_vma *parent; /* Parent of this anon_vma */
48+
3949
/*
4050
* NOTE: the LSB of the rb_root.rb_node is set by
4151
* mm_take_all_locks() _after_ taking the above lock. So the

mm/rmap.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
7272
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
7373
if (anon_vma) {
7474
atomic_set(&anon_vma->refcount, 1);
75+
anon_vma->degree = 1; /* Reference for first vma */
76+
anon_vma->parent = anon_vma;
7577
/*
7678
* Initialise the anon_vma root to point to itself. If called
7779
* from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
188190
if (likely(!vma->anon_vma)) {
189191
vma->anon_vma = anon_vma;
190192
anon_vma_chain_link(vma, avc, anon_vma);
193+
/* vma reference or self-parent link for new root */
194+
anon_vma->degree++;
191195
allocated = NULL;
192196
avc = NULL;
193197
}
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root)
236240
/*
237241
* Attach the anon_vmas from src to dst.
238242
* Returns 0 on success, -ENOMEM on failure.
243+
*
244+
* If dst->anon_vma is NULL this function tries to find and reuse existing
245+
* anon_vma which has no vmas and only one child anon_vma. This prevents
246+
* degradation of anon_vma hierarchy to endless linear chain in case of
247+
* constantly forking task. On the other hand, an anon_vma with more than one
248+
* child isn't reused even if there was no alive vma, thus rmap walker has a
249+
* good chance of avoiding scanning the whole hierarchy when it searches where
250+
* page is mapped.
239251
*/
240252
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
241253
{
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
256268
anon_vma = pavc->anon_vma;
257269
root = lock_anon_vma_root(root, anon_vma);
258270
anon_vma_chain_link(dst, avc, anon_vma);
271+
272+
/*
273+
* Reuse existing anon_vma if its degree lower than two,
274+
* that means it has no vma and only one anon_vma child.
275+
*
276+
* Do not chose parent anon_vma, otherwise first child
277+
* will always reuse it. Root anon_vma is never reused:
278+
* it has self-parent reference and at least one child.
279+
*/
280+
if (!dst->anon_vma && anon_vma != src->anon_vma &&
281+
anon_vma->degree < 2)
282+
dst->anon_vma = anon_vma;
259283
}
284+
if (dst->anon_vma)
285+
dst->anon_vma->degree++;
260286
unlock_anon_vma_root(root);
261287
return 0;
262288

@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280306
if (!pvma->anon_vma)
281307
return 0;
282308

309+
/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
310+
vma->anon_vma = NULL;
311+
283312
/*
284313
* First, attach the new VMA to the parent VMA's anon_vmas,
285314
* so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
288317
if (error)
289318
return error;
290319

320+
/* An existing anon_vma has been reused, all done then. */
321+
if (vma->anon_vma)
322+
return 0;
323+
291324
/* Then add our own anon_vma. */
292325
anon_vma = anon_vma_alloc();
293326
if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
301334
* lock any of the anon_vmas in this anon_vma tree.
302335
*/
303336
anon_vma->root = pvma->anon_vma->root;
337+
anon_vma->parent = pvma->anon_vma;
304338
/*
305339
* With refcounts, an anon_vma can stay around longer than the
306340
* process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
311345
vma->anon_vma = anon_vma;
312346
anon_vma_lock_write(anon_vma);
313347
anon_vma_chain_link(vma, avc, anon_vma);
348+
anon_vma->parent->degree++;
314349
anon_vma_unlock_write(anon_vma);
315350

316351
return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
341376
* Leave empty anon_vmas on the list - we'll need
342377
* to free them outside the lock.
343378
*/
344-
if (RB_EMPTY_ROOT(&anon_vma->rb_root))
379+
if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
380+
anon_vma->parent->degree--;
345381
continue;
382+
}
346383

347384
list_del(&avc->same_vma);
348385
anon_vma_chain_free(avc);
349386
}
387+
if (vma->anon_vma)
388+
vma->anon_vma->degree--;
350389
unlock_anon_vma_root(root);
351390

352391
/*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
357396
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
358397
struct anon_vma *anon_vma = avc->anon_vma;
359398

399+
BUG_ON(anon_vma->degree);
360400
put_anon_vma(anon_vma);
361401

362402
list_del(&avc->same_vma);

0 commit comments

Comments
 (0)