Skip to content

Commit 091a1ea

Browse files
committed
Merge branch 'akpm'
* akpm: mm: madvise(MADV_DODUMP): allow hugetlbfs pages ocfs2: fix locking for res->tracking and dlm->tracking_list mm/vmscan.c: fix int overflow in callers of do_shrink_slab() mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly mm/vmstat.c: fix outdated vmstat_text proc: restrict kernel stack dumps to root mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes mm/migrate.c: split only transparent huge pages when allocation fails ipc/shm.c: use ERR_CAST() for shm_lock() error return mm/gup_benchmark: fix unsigned comparison to zero in __gup_benchmark_ioctl mm, thp: fix mlocking THP page with migration enabled ocfs2: fix crash in ocfs2_duplicate_clusters_by_page() hugetlb: take PMD sharing into account when flushing tlb/caches mm: migration: fix migration of huge PMD shared pages
2 parents 5943a9b + d41aa52 commit 091a1ea

File tree

18 files changed

+189
-30
lines changed

18 files changed

+189
-30
lines changed

fs/ocfs2/dlm/dlmmaster.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
584584

585585
res->last_used = 0;
586586

587-
spin_lock(&dlm->spinlock);
587+
spin_lock(&dlm->track_lock);
588588
list_add_tail(&res->tracking, &dlm->tracking_list);
589-
spin_unlock(&dlm->spinlock);
589+
spin_unlock(&dlm->track_lock);
590590

591591
memset(res->lvb, 0, DLM_LVB_LEN);
592592
memset(res->refmap, 0, sizeof(res->refmap));

fs/ocfs2/refcounttree.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
29462946
if (map_end & (PAGE_SIZE - 1))
29472947
to = map_end & (PAGE_SIZE - 1);
29482948

2949+
retry:
29492950
page = find_or_create_page(mapping, page_index, GFP_NOFS);
29502951
if (!page) {
29512952
ret = -ENOMEM;
@@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
29542955
}
29552956

29562957
/*
2957-
* In case PAGE_SIZE <= CLUSTER_SIZE, This page
2958-
* can't be dirtied before we CoW it out.
2958+
* In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
2959+
* page, so write it back.
29592960
*/
2960-
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2961-
BUG_ON(PageDirty(page));
2961+
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
2962+
if (PageDirty(page)) {
2963+
/*
2964+
* write_on_page will unlock the page on return
2965+
*/
2966+
ret = write_one_page(page);
2967+
goto retry;
2968+
}
2969+
}
29622970

29632971
if (!PageUptodate(page)) {
29642972
ret = block_read_full_page(page, ocfs2_get_block);

fs/proc/base.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
407407
unsigned long *entries;
408408
int err;
409409

410+
/*
411+
* The ability to racily run the kernel stack unwinder on a running task
412+
* and then observe the unwinder output is scary; while it is useful for
413+
* debugging kernel issues, it can also allow an attacker to leak kernel
414+
* stack contents.
415+
* Doing this in a manner that is at least safe from races would require
416+
* some work to ensure that the remote task can not be scheduled; and
417+
* even then, this would still expose the unwinder as local attack
418+
* surface.
419+
* Therefore, this interface is restricted to root.
420+
*/
421+
if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
422+
return -EACCES;
423+
410424
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
411425
GFP_KERNEL);
412426
if (!entries)

include/linux/hugetlb.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
140140
pte_t *huge_pte_offset(struct mm_struct *mm,
141141
unsigned long addr, unsigned long sz);
142142
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
143+
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
144+
unsigned long *start, unsigned long *end);
143145
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
144146
int write);
145147
struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
170172
return 0;
171173
}
172174

175+
static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
176+
pte_t *ptep)
177+
{
178+
return 0;
179+
}
180+
181+
static inline void adjust_range_if_pmd_sharing_possible(
182+
struct vm_area_struct *vma,
183+
unsigned long *start, unsigned long *end)
184+
{
185+
}
186+
173187
#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
174188
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
175189
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })

include/linux/mm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
24552455
return vma;
24562456
}
24572457

2458+
static inline bool range_in_vma(struct vm_area_struct *vma,
2459+
unsigned long start, unsigned long end)
2460+
{
2461+
return (vma && vma->vm_start <= start && end <= vma->vm_end);
2462+
}
2463+
24582464
#ifdef CONFIG_MMU
24592465
pgprot_t vm_get_page_prot(unsigned long vm_flags);
24602466
void vma_set_page_prot(struct vm_area_struct *vma);

include/uapi/asm-generic/hugetlb_encode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
2727
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
2828
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
29+
#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
2930
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
31+
#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
3032
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
3133
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
3234
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)

include/uapi/linux/memfd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
2626
#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
2727
#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
28+
#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
2829
#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
30+
#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
2931
#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
3032
#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
3133
#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

include/uapi/linux/mman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
2929
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
3030
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
31+
#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
3132
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
33+
#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
3234
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
3335
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
3436
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

include/uapi/linux/shm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ struct shmid_ds {
6565
#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
6666
#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
6767
#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
68+
#define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
6869
#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
70+
#define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
6971
#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
7072
#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
7173
#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

ipc/shm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
206206
* Callers of shm_lock() must validate the status of the returned ipc
207207
* object pointer and error out as appropriate.
208208
*/
209-
return (void *)ipcp;
209+
return ERR_CAST(ipcp);
210210
}
211211

212212
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)

mm/gup_benchmark.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
1919
struct gup_benchmark *gup)
2020
{
2121
ktime_t start_time, end_time;
22-
unsigned long i, nr, nr_pages, addr, next;
22+
unsigned long i, nr_pages, addr, next;
23+
int nr;
2324
struct page **pages;
2425

2526
nr_pages = gup->size / PAGE_SIZE;

mm/huge_memory.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
29312931
else
29322932
page_add_file_rmap(new, true);
29332933
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
2934-
if (vma->vm_flags & VM_LOCKED)
2934+
if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
29352935
mlock_vma_page(new);
29362936
update_mmu_cache_pmd(vma, address, pvmw->pmd);
29372937
}

mm/hugetlb.c

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
33263326
struct page *page;
33273327
struct hstate *h = hstate_vma(vma);
33283328
unsigned long sz = huge_page_size(h);
3329-
const unsigned long mmun_start = start; /* For mmu_notifiers */
3330-
const unsigned long mmun_end = end; /* For mmu_notifiers */
3329+
unsigned long mmun_start = start; /* For mmu_notifiers */
3330+
unsigned long mmun_end = end; /* For mmu_notifiers */
33313331

33323332
WARN_ON(!is_vm_hugetlb_page(vma));
33333333
BUG_ON(start & ~huge_page_mask(h));
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
33393339
*/
33403340
tlb_remove_check_page_size_change(tlb, sz);
33413341
tlb_start_vma(tlb, vma);
3342+
3343+
/*
3344+
* If sharing possible, alert mmu notifiers of worst case.
3345+
*/
3346+
adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
33423347
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
33433348
address = start;
33443349
for (; address < end; address += sz) {
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
33493354
ptl = huge_pte_lock(h, mm, ptep);
33503355
if (huge_pmd_unshare(mm, &address, ptep)) {
33513356
spin_unlock(ptl);
3357+
/*
3358+
* We just unmapped a page of PMDs by clearing a PUD.
3359+
* The caller's TLB flush range should cover this area.
3360+
*/
33523361
continue;
33533362
}
33543363

@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
34313440
{
34323441
struct mm_struct *mm;
34333442
struct mmu_gather tlb;
3443+
unsigned long tlb_start = start;
3444+
unsigned long tlb_end = end;
3445+
3446+
/*
3447+
* If shared PMDs were possibly used within this vma range, adjust
3448+
* start/end for worst case tlb flushing.
3449+
* Note that we can not be sure if PMDs are shared until we try to
3450+
* unmap pages. However, we want to make sure TLB flushing covers
3451+
* the largest possible range.
3452+
*/
3453+
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
34343454

34353455
mm = vma->vm_mm;
34363456

3437-
tlb_gather_mmu(&tlb, mm, start, end);
3457+
tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
34383458
__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
3439-
tlb_finish_mmu(&tlb, start, end);
3459+
tlb_finish_mmu(&tlb, tlb_start, tlb_end);
34403460
}
34413461

34423462
/*
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
42984318
pte_t pte;
42994319
struct hstate *h = hstate_vma(vma);
43004320
unsigned long pages = 0;
4321+
unsigned long f_start = start;
4322+
unsigned long f_end = end;
4323+
bool shared_pmd = false;
4324+
4325+
/*
4326+
* In the case of shared PMDs, the area to flush could be beyond
4327+
* start/end. Set f_start/f_end to cover the maximum possible
4328+
* range if PMD sharing is possible.
4329+
*/
4330+
adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
43014331

43024332
BUG_ON(address >= end);
4303-
flush_cache_range(vma, address, end);
4333+
flush_cache_range(vma, f_start, f_end);
43044334

4305-
mmu_notifier_invalidate_range_start(mm, start, end);
4335+
mmu_notifier_invalidate_range_start(mm, f_start, f_end);
43064336
i_mmap_lock_write(vma->vm_file->f_mapping);
43074337
for (; address < end; address += huge_page_size(h)) {
43084338
spinlock_t *ptl;
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
43134343
if (huge_pmd_unshare(mm, &address, ptep)) {
43144344
pages++;
43154345
spin_unlock(ptl);
4346+
shared_pmd = true;
43164347
continue;
43174348
}
43184349
pte = huge_ptep_get(ptep);
@@ -4348,17 +4379,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
43484379
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
43494380
* may have cleared our pud entry and done put_page on the page table:
43504381
* once we release i_mmap_rwsem, another task can do the final put_page
4351-
* and that page table be reused and filled with junk.
4382+
* and that page table be reused and filled with junk. If we actually
4383+
* did unshare a page of pmds, flush the range corresponding to the pud.
43524384
*/
4353-
flush_hugetlb_tlb_range(vma, start, end);
4385+
if (shared_pmd)
4386+
flush_hugetlb_tlb_range(vma, f_start, f_end);
4387+
else
4388+
flush_hugetlb_tlb_range(vma, start, end);
43544389
/*
43554390
* No need to call mmu_notifier_invalidate_range() we are downgrading
43564391
* page table protection not changing it to point to a new page.
43574392
*
43584393
* See Documentation/vm/mmu_notifier.rst
43594394
*/
43604395
i_mmap_unlock_write(vma->vm_file->f_mapping);
4361-
mmu_notifier_invalidate_range_end(mm, start, end);
4396+
mmu_notifier_invalidate_range_end(mm, f_start, f_end);
43624397

43634398
return pages << h->order;
43644399
}
@@ -4545,12 +4580,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
45454580
/*
45464581
* check on proper vm_flags and page table alignment
45474582
*/
4548-
if (vma->vm_flags & VM_MAYSHARE &&
4549-
vma->vm_start <= base && end <= vma->vm_end)
4583+
if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
45504584
return true;
45514585
return false;
45524586
}
45534587

4588+
/*
4589+
* Determine if start,end range within vma could be mapped by shared pmd.
4590+
* If yes, adjust start and end to cover range associated with possible
4591+
* shared pmd mappings.
4592+
*/
4593+
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
4594+
unsigned long *start, unsigned long *end)
4595+
{
4596+
unsigned long check_addr = *start;
4597+
4598+
if (!(vma->vm_flags & VM_MAYSHARE))
4599+
return;
4600+
4601+
for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
4602+
unsigned long a_start = check_addr & PUD_MASK;
4603+
unsigned long a_end = a_start + PUD_SIZE;
4604+
4605+
/*
4606+
* If sharing is possible, adjust start/end if necessary.
4607+
*/
4608+
if (range_in_vma(vma, a_start, a_end)) {
4609+
if (a_start < *start)
4610+
*start = a_start;
4611+
if (a_end > *end)
4612+
*end = a_end;
4613+
}
4614+
}
4615+
}
4616+
45544617
/*
45554618
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
45564619
* and returns the corresponding pte. While this is not necessary for the
@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
46484711
{
46494712
return 0;
46504713
}
4714+
4715+
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
4716+
unsigned long *start, unsigned long *end)
4717+
{
4718+
}
46514719
#define want_pmd_share() (0)
46524720
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
46534721

mm/madvise.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
9696
new_flags |= VM_DONTDUMP;
9797
break;
9898
case MADV_DODUMP:
99-
if (new_flags & VM_SPECIAL) {
99+
if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
100100
error = -EINVAL;
101101
goto out;
102102
}

mm/migrate.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
275275
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
276276
mlock_vma_page(new);
277277

278+
if (PageTransHuge(page) && PageMlocked(page))
279+
clear_page_mlock(page);
280+
278281
/* No need to invalidate - it was non-present before */
279282
update_mmu_cache(vma, pvmw.address, pvmw.pte);
280283
}
@@ -1411,7 +1414,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
14111414
* we encounter them after the rest of the list
14121415
* is processed.
14131416
*/
1414-
if (PageTransHuge(page)) {
1417+
if (PageTransHuge(page) && !PageHuge(page)) {
14151418
lock_page(page);
14161419
rc = split_huge_page_to_list(page, from);
14171420
unlock_page(page);

0 commit comments

Comments
 (0)