Skip to content

Commit a00cc7d

Browse files
Matthew Wilcoxtorvalds
authored andcommitted
mm, x86: add support for PUD-sized transparent hugepages
The current transparent hugepage code only supports PMDs. This patch adds support for transparent use of PUDs with DAX. It does not include support for anonymous pages. x86 support code also added. Most of this patch simply parallels the work that was done for huge PMDs. The only major difference is how the new ->pud_entry method in mm_walk works. The ->pmd_entry method replaces the ->pte_entry method, whereas the ->pud_entry method works along with either ->pmd_entry or ->pte_entry. The pagewalk code takes care of locking the PUD before calling ->pud_walk, so handlers do not need to worry whether the PUD is stable. [dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()] Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com [dave.jiang@intel.com: native_pud_clear missing on i386 build] Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Tested-by: Alexander Kapshuk <alexander.kapshuk@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jan Kara <jack@suse.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent a2d5816 commit a00cc7d

File tree

21 files changed

+844
-18
lines changed

21 files changed

+844
-18
lines changed

arch/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
571571
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
572572
bool
573573

574+
config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
575+
bool
576+
574577
config HAVE_ARCH_HUGE_VMAP
575578
bool
576579

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ config X86
109109
select HAVE_ARCH_SECCOMP_FILTER
110110
select HAVE_ARCH_TRACEHOOK
111111
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
112+
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
112113
select HAVE_ARCH_VMAP_STACK if X86_64
113114
select HAVE_ARCH_WITHIN_STACK_FRAMES
114115
select HAVE_CC_STACKPROTECTOR

arch/x86/include/asm/paravirt.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
475475
native_pmd_val(pmd));
476476
}
477477

478+
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
479+
pud_t *pudp, pud_t pud)
480+
{
481+
if (sizeof(pudval_t) > sizeof(long))
482+
/* 5 arg words */
483+
pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
484+
else
485+
PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
486+
native_pud_val(pud));
487+
}
488+
478489
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
479490
{
480491
pmdval_t val = native_pmd_val(pmd);

arch/x86/include/asm/paravirt_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ struct pv_mmu_ops {
249249
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
250250
void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
251251
pmd_t *pmdp, pmd_t pmdval);
252+
void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
253+
pud_t *pudp, pud_t pudval);
252254
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
253255
pte_t *ptep);
254256

arch/x86/include/asm/pgtable-2level.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
2121
*pmdp = pmd;
2222
}
2323

24+
static inline void native_set_pud(pud_t *pudp, pud_t pud)
25+
{
26+
}
27+
2428
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
2529
{
2630
native_set_pte(ptep, pte);
@@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp)
3135
native_set_pmd(pmdp, __pmd(0));
3236
}
3337

38+
static inline void native_pud_clear(pud_t *pudp)
39+
{
40+
}
41+
3442
static inline void native_pte_clear(struct mm_struct *mm,
3543
unsigned long addr, pte_t *xp)
3644
{
@@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
5563
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
5664
#endif
5765

66+
#ifdef CONFIG_SMP
67+
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
68+
{
69+
return __pud(xchg((pudval_t *)xp, 0));
70+
}
71+
#else
72+
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
73+
#endif
74+
5875
/* Bit manipulation helper on pte/pgoff entry */
5976
static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
6077
unsigned long mask, unsigned int leftshift)

arch/x86/include/asm/pgtable-3level.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ static inline void native_pmd_clear(pmd_t *pmd)
121121
*(tmp + 1) = 0;
122122
}
123123

124+
#ifndef CONFIG_SMP
125+
static inline void native_pud_clear(pud_t *pudp)
126+
{
127+
}
128+
#endif
129+
124130
static inline void pud_clear(pud_t *pudp)
125131
{
126132
set_pud(pudp, __pud(0));
@@ -176,6 +182,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
176182
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
177183
#endif
178184

185+
#ifdef CONFIG_SMP
186+
union split_pud {
187+
struct {
188+
u32 pud_low;
189+
u32 pud_high;
190+
};
191+
pud_t pud;
192+
};
193+
194+
static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
195+
{
196+
union split_pud res, *orig = (union split_pud *)pudp;
197+
198+
/* xchg acts as a barrier before setting of the high bits */
199+
res.pud_low = xchg(&orig->pud_low, 0);
200+
res.pud_high = orig->pud_high;
201+
orig->pud_high = 0;
202+
203+
return res.pud;
204+
}
205+
#else
206+
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
207+
#endif
208+
179209
/* Encode and de-code a swap entry */
180210
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
181211
#define __swp_type(x) (((x).val) & 0x1f)

arch/x86/include/asm/pgtable.h

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
4646
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
4747
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
4848
#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
49+
#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
4950

5051
#define set_pte_atomic(ptep, pte) \
5152
native_set_pte_atomic(ptep, pte)
@@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd)
128129
return pmd_flags(pmd) & _PAGE_ACCESSED;
129130
}
130131

132+
static inline int pud_dirty(pud_t pud)
133+
{
134+
return pud_flags(pud) & _PAGE_DIRTY;
135+
}
136+
137+
static inline int pud_young(pud_t pud)
138+
{
139+
return pud_flags(pud) & _PAGE_ACCESSED;
140+
}
141+
131142
static inline int pte_write(pte_t pte)
132143
{
133144
return pte_flags(pte) & _PAGE_RW;
@@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd)
181192
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
182193
}
183194

195+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
196+
static inline int pud_trans_huge(pud_t pud)
197+
{
198+
return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
199+
}
200+
#endif
201+
184202
#define has_transparent_hugepage has_transparent_hugepage
185203
static inline int has_transparent_hugepage(void)
186204
{
@@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd)
192210
{
193211
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
194212
}
213+
214+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
215+
static inline int pud_devmap(pud_t pud)
216+
{
217+
return !!(pud_val(pud) & _PAGE_DEVMAP);
218+
}
219+
#else
220+
static inline int pud_devmap(pud_t pud)
221+
{
222+
return 0;
223+
}
224+
#endif
195225
#endif
196226
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
197227

@@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
333363
return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
334364
}
335365

366+
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
367+
{
368+
pudval_t v = native_pud_val(pud);
369+
370+
return __pud(v | set);
371+
}
372+
373+
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
374+
{
375+
pudval_t v = native_pud_val(pud);
376+
377+
return __pud(v & ~clear);
378+
}
379+
380+
static inline pud_t pud_mkold(pud_t pud)
381+
{
382+
return pud_clear_flags(pud, _PAGE_ACCESSED);
383+
}
384+
385+
static inline pud_t pud_mkclean(pud_t pud)
386+
{
387+
return pud_clear_flags(pud, _PAGE_DIRTY);
388+
}
389+
390+
static inline pud_t pud_wrprotect(pud_t pud)
391+
{
392+
return pud_clear_flags(pud, _PAGE_RW);
393+
}
394+
395+
static inline pud_t pud_mkdirty(pud_t pud)
396+
{
397+
return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
398+
}
399+
400+
static inline pud_t pud_mkdevmap(pud_t pud)
401+
{
402+
return pud_set_flags(pud, _PAGE_DEVMAP);
403+
}
404+
405+
static inline pud_t pud_mkhuge(pud_t pud)
406+
{
407+
return pud_set_flags(pud, _PAGE_PSE);
408+
}
409+
410+
static inline pud_t pud_mkyoung(pud_t pud)
411+
{
412+
return pud_set_flags(pud, _PAGE_ACCESSED);
413+
}
414+
415+
static inline pud_t pud_mkwrite(pud_t pud)
416+
{
417+
return pud_set_flags(pud, _PAGE_RW);
418+
}
419+
420+
static inline pud_t pud_mknotpresent(pud_t pud)
421+
{
422+
return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
423+
}
424+
336425
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
337426
static inline int pte_soft_dirty(pte_t pte)
338427
{
@@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
344433
return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
345434
}
346435

436+
static inline int pud_soft_dirty(pud_t pud)
437+
{
438+
return pud_flags(pud) & _PAGE_SOFT_DIRTY;
439+
}
440+
347441
static inline pte_t pte_mksoft_dirty(pte_t pte)
348442
{
349443
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
@@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
354448
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
355449
}
356450

451+
static inline pud_t pud_mksoft_dirty(pud_t pud)
452+
{
453+
return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
454+
}
455+
357456
static inline pte_t pte_clear_soft_dirty(pte_t pte)
358457
{
359458
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
@@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
364463
return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
365464
}
366465

466+
static inline pud_t pud_clear_soft_dirty(pud_t pud)
467+
{
468+
return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
469+
}
470+
367471
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
368472

369473
/*
@@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
392496
massage_pgprot(pgprot));
393497
}
394498

499+
static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
500+
{
501+
return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
502+
massage_pgprot(pgprot));
503+
}
504+
395505
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
396506
{
397507
pteval_t val = pte_val(pte);
@@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
771881
return res;
772882
}
773883

884+
static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
885+
{
886+
pud_t res = *pudp;
887+
888+
native_pud_clear(pudp);
889+
return res;
890+
}
891+
774892
static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
775893
pte_t *ptep , pte_t pte)
776894
{
@@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
783901
native_set_pmd(pmdp, pmd);
784902
}
785903

904+
static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
905+
pud_t *pudp, pud_t pud)
906+
{
907+
native_set_pud(pudp, pud);
908+
}
909+
786910
#ifndef CONFIG_PARAVIRT
787911
/*
788912
* Rules for using pte_update - it must be called after any PTE update which
@@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
861985
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
862986
unsigned long address, pmd_t *pmdp,
863987
pmd_t entry, int dirty);
988+
extern int pudp_set_access_flags(struct vm_area_struct *vma,
989+
unsigned long address, pud_t *pudp,
990+
pud_t entry, int dirty);
864991

865992
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
866993
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
867994
unsigned long addr, pmd_t *pmdp);
995+
extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
996+
unsigned long addr, pud_t *pudp);
868997

869998
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
870999
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
@@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
8841013
return native_pmdp_get_and_clear(pmdp);
8851014
}
8861015

1016+
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
1017+
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
1018+
unsigned long addr, pud_t *pudp)
1019+
{
1020+
return native_pudp_get_and_clear(pudp);
1021+
}
1022+
8871023
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
8881024
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
8891025
unsigned long addr, pmd_t *pmdp)
@@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
9321068
unsigned long addr, pmd_t *pmd)
9331069
{
9341070
}
1071+
static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
1072+
unsigned long addr, pud_t *pud)
1073+
{
1074+
}
9351075

9361076
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
9371077
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)

arch/x86/include/asm/pgtable_64.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud)
106106
native_set_pud(pud, native_make_pud(0));
107107
}
108108

109+
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
110+
{
111+
#ifdef CONFIG_SMP
112+
return native_make_pud(xchg(&xp->pud, 0));
113+
#else
114+
/* native_local_pudp_get_and_clear,
115+
* but duplicated because of cyclic dependency
116+
*/
117+
pud_t ret = *xp;
118+
119+
native_pud_clear(xp);
120+
return ret;
121+
#endif
122+
}
123+
109124
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
110125
{
111126
*pgdp = pgd;

arch/x86/kernel/paravirt.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
425425
.pmd_clear = native_pmd_clear,
426426
#endif
427427
.set_pud = native_set_pud,
428+
.set_pud_at = native_set_pud_at,
428429

429430
.pmd_val = PTE_IDENT,
430431
.make_pmd = PTE_IDENT,

0 commit comments

Comments
 (0)