Skip to content

Commit af5b0f6

Browse files
kiryltorvalds
authored andcommitted
mm: consolidate page table accounting
Currently, we account page tables separately for each page table level, but that's redundant -- we only make use of total memory allocated to page tables for oom_badness calculation. We also provide the information to userspace, but it has dubious value there too. This patch switches page table accounting to single counter. mm->pgtables_bytes is now used to account all page table levels. We use bytes, because page table size for different levels of page table tree may be different. The change has user-visible effect: we don't have VmPMD and VmPUD reported in /proc/[pid]/status. Not sure if anybody uses them. (As alternative, we can always report 0 kB for them.) OOM-killer report is also slightly changed: we now report pgtables_bytes instead of nr_ptes, nr_pmd, nr_puds. Apart from reducing number of counters per-mm, the benefit is that we now calculate oom_badness() more correctly for machines which have different size of page tables depending on level or where page tables are less than a page in size. The only downside can be debuggability because we do not know which page table level could leak. But I do not remember many bugs that would be caught by separate counters so I wouldn't lose sleep over this. [akpm@linux-foundation.org: fix mm/huge_memory.c] Link: http://lkml.kernel.org/r/20171006100651.44742-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> [kirill.shutemov@linux.intel.com: fix build] Link: http://lkml.kernel.org/r/20171016150113.ikfxy3e7zzfvsr4w@black.fi.intel.com Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent c481290 commit af5b0f6

File tree

9 files changed

+32
-93
lines changed

9 files changed

+32
-93
lines changed

Documentation/filesystems/proc.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8)
250250
VmExe size of text segment
251251
VmLib size of shared library code
252252
VmPTE size of page table entries
253-
VmPMD size of second level page tables
254253
VmSwap amount of swap used by anonymous private data
255254
(shmem swap usage is not included)
256255
HugetlbPages size of hugetlb memory portions

Documentation/sysctl/vm.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -629,10 +629,10 @@ oom_dump_tasks
629629

630630
Enables a system-wide task dump (excluding kernel threads) to be produced
631631
when the kernel performs an OOM-killing and includes such information as
632-
pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
633-
oom_score_adj score, and name. This is helpful to determine why the OOM
634-
killer was invoked, to identify the rogue task that caused it, and to
635-
determine why the OOM killer chose the task it did to kill.
632+
pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
633+
score, and name. This is helpful to determine why the OOM killer was
634+
invoked, to identify the rogue task that caused it, and to determine why
635+
the OOM killer chose the task it did to kill.
636636

637637
If this is set to zero, this information is suppressed. On very
638638
large systems with thousands of tasks it may not be feasible to dump

fs/proc/task_mmu.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
void task_mem(struct seq_file *m, struct mm_struct *mm)
2828
{
29-
unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
29+
unsigned long text, lib, swap, anon, file, shmem;
3030
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
3131

3232
anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -50,9 +50,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
5050
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
5151
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
5252
swap = get_mm_counter(mm, MM_SWAPENTS);
53-
ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
54-
pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
55-
puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
5653
seq_printf(m,
5754
"VmPeak:\t%8lu kB\n"
5855
"VmSize:\t%8lu kB\n"
@@ -68,8 +65,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
6865
"VmExe:\t%8lu kB\n"
6966
"VmLib:\t%8lu kB\n"
7067
"VmPTE:\t%8lu kB\n"
71-
"VmPMD:\t%8lu kB\n"
72-
"VmPUD:\t%8lu kB\n"
7368
"VmSwap:\t%8lu kB\n",
7469
hiwater_vm << (PAGE_SHIFT-10),
7570
total_vm << (PAGE_SHIFT-10),
@@ -82,9 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
8277
shmem << (PAGE_SHIFT-10),
8378
mm->data_vm << (PAGE_SHIFT-10),
8479
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
85-
ptes >> 10,
86-
pmds >> 10,
87-
puds >> 10,
80+
mm_pgtables_bytes(mm) >> 10,
8881
swap << (PAGE_SHIFT-10));
8982
hugetlb_report_usage(m, mm);
9083
}

include/linux/mm.h

Lines changed: 12 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,37 +1605,20 @@ static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
16051605
{
16061606
return 0;
16071607
}
1608-
1609-
static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
1610-
{
1611-
return 0;
1612-
}
1613-
1614-
static inline void mm_nr_puds_init(struct mm_struct *mm) {}
16151608
static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
16161609
static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
16171610

16181611
#else
16191612
int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
16201613

1621-
static inline void mm_nr_puds_init(struct mm_struct *mm)
1622-
{
1623-
atomic_long_set(&mm->nr_puds, 0);
1624-
}
1625-
1626-
static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
1627-
{
1628-
return atomic_long_read(&mm->nr_puds);
1629-
}
1630-
16311614
static inline void mm_inc_nr_puds(struct mm_struct *mm)
16321615
{
1633-
atomic_long_inc(&mm->nr_puds);
1616+
atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
16341617
}
16351618

16361619
static inline void mm_dec_nr_puds(struct mm_struct *mm)
16371620
{
1638-
atomic_long_dec(&mm->nr_puds);
1621+
atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
16391622
}
16401623
#endif
16411624

@@ -1646,64 +1629,47 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
16461629
return 0;
16471630
}
16481631

1649-
static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
1650-
1651-
static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
1652-
{
1653-
return 0;
1654-
}
1655-
16561632
static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
16571633
static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
16581634

16591635
#else
16601636
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
16611637

1662-
static inline void mm_nr_pmds_init(struct mm_struct *mm)
1663-
{
1664-
atomic_long_set(&mm->nr_pmds, 0);
1665-
}
1666-
1667-
static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
1668-
{
1669-
return atomic_long_read(&mm->nr_pmds);
1670-
}
1671-
16721638
static inline void mm_inc_nr_pmds(struct mm_struct *mm)
16731639
{
1674-
atomic_long_inc(&mm->nr_pmds);
1640+
atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
16751641
}
16761642

16771643
static inline void mm_dec_nr_pmds(struct mm_struct *mm)
16781644
{
1679-
atomic_long_dec(&mm->nr_pmds);
1645+
atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
16801646
}
16811647
#endif
16821648

16831649
#ifdef CONFIG_MMU
1684-
static inline void mm_nr_ptes_init(struct mm_struct *mm)
1650+
static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
16851651
{
1686-
atomic_long_set(&mm->nr_ptes, 0);
1652+
atomic_long_set(&mm->pgtables_bytes, 0);
16871653
}
16881654

1689-
static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
1655+
static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
16901656
{
1691-
return atomic_long_read(&mm->nr_ptes);
1657+
return atomic_long_read(&mm->pgtables_bytes);
16921658
}
16931659

16941660
static inline void mm_inc_nr_ptes(struct mm_struct *mm)
16951661
{
1696-
atomic_long_inc(&mm->nr_ptes);
1662+
atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
16971663
}
16981664

16991665
static inline void mm_dec_nr_ptes(struct mm_struct *mm)
17001666
{
1701-
atomic_long_dec(&mm->nr_ptes);
1667+
atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
17021668
}
17031669
#else
1704-
static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
17051670

1706-
static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
1671+
static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
1672+
static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
17071673
{
17081674
return 0;
17091675
}

include/linux/mm_types.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -402,13 +402,7 @@ struct mm_struct {
402402
atomic_t mm_count;
403403

404404
#ifdef CONFIG_MMU
405-
atomic_long_t nr_ptes; /* PTE page table pages */
406-
#endif
407-
#if CONFIG_PGTABLE_LEVELS > 2
408-
atomic_long_t nr_pmds; /* PMD page table pages */
409-
#endif
410-
#if CONFIG_PGTABLE_LEVELS > 3
411-
atomic_long_t nr_puds; /* PUD page table pages */
405+
atomic_long_t pgtables_bytes; /* PTE page table pages */
412406
#endif
413407
int map_count; /* number of VMAs */
414408

kernel/fork.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
817817
init_rwsem(&mm->mmap_sem);
818818
INIT_LIST_HEAD(&mm->mmlist);
819819
mm->core_state = NULL;
820-
mm_nr_ptes_init(mm);
821-
mm_nr_pmds_init(mm);
822-
mm_nr_puds_init(mm);
820+
mm_pgtables_bytes_init(mm);
823821
mm->map_count = 0;
824822
mm->locked_vm = 0;
825823
mm->pinned_vm = 0;
@@ -873,15 +871,9 @@ static void check_mm(struct mm_struct *mm)
873871
"mm:%p idx:%d val:%ld\n", mm, i, x);
874872
}
875873

876-
if (mm_nr_ptes(mm))
877-
pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
878-
mm_nr_ptes(mm));
879-
if (mm_nr_pmds(mm))
880-
pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
881-
mm_nr_pmds(mm));
882-
if (mm_nr_puds(mm))
883-
pr_alert("BUG: non-zero nr_puds on freeing mm: %ld\n",
884-
mm_nr_puds(mm));
874+
if (mm_pgtables_bytes(mm))
875+
pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
876+
mm_pgtables_bytes(mm));
885877

886878
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
887879
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);

mm/debug.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,7 @@ void dump_mm(const struct mm_struct *mm)
105105
"get_unmapped_area %p\n"
106106
#endif
107107
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
108-
"pgd %p mm_users %d mm_count %d\n"
109-
"nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n"
108+
"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
110109
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
111110
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
112111
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
@@ -136,9 +135,7 @@ void dump_mm(const struct mm_struct *mm)
136135
mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
137136
mm->pgd, atomic_read(&mm->mm_users),
138137
atomic_read(&mm->mm_count),
139-
mm_nr_ptes(mm),
140-
mm_nr_pmds(mm),
141-
mm_nr_puds(mm),
138+
mm_pgtables_bytes(mm),
142139
mm->map_count,
143140
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
144141
mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,

mm/huge_memory.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
942942
set_pmd_at(src_mm, addr, src_pmd, pmd);
943943
}
944944
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
945-
atomic_long_inc(&dst_mm->nr_ptes);
945+
mm_inc_nr_ptes(dst_mm);
946946
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
947947
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
948948
ret = 0;

mm/oom_kill.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
221221
* task's rss, pagetable and swap space use.
222222
*/
223223
points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
224-
mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
224+
mm_pgtables_bytes(p->mm) / PAGE_SIZE;
225225
task_unlock(p);
226226

227227
/*
@@ -389,15 +389,15 @@ static void select_bad_process(struct oom_control *oc)
389389
* Dumps the current memory state of all eligible tasks. Tasks not in the same
390390
* memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
391391
* are not shown.
392-
* State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
393-
* swapents, oom_score_adj value, and name.
392+
* State information includes task's pid, uid, tgid, vm size, rss,
393+
* pgtables_bytes, swapents, oom_score_adj value, and name.
394394
*/
395395
static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
396396
{
397397
struct task_struct *p;
398398
struct task_struct *task;
399399

400-
pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n");
400+
pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
401401
rcu_read_lock();
402402
for_each_process(p) {
403403
if (oom_unkillable_task(p, memcg, nodemask))
@@ -413,12 +413,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
413413
continue;
414414
}
415415

416-
pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu %5hd %s\n",
416+
pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
417417
task->pid, from_kuid(&init_user_ns, task_uid(task)),
418418
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
419-
mm_nr_ptes(task->mm),
420-
mm_nr_pmds(task->mm),
421-
mm_nr_puds(task->mm),
419+
mm_pgtables_bytes(task->mm),
422420
get_mm_counter(task->mm, MM_SWAPENTS),
423421
task->signal->oom_score_adj, task->comm);
424422
task_unlock(task);

0 commit comments

Comments
 (0)