Skip to content

Commit 86fbf16

Browse files
committed
Merge branch 'akpm' (incoming from Andrew)
Merge patches from Andrew Morton: "23 fixes and a MAINTAINERS update" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits) mm/hugetlb: check for pte NULL pointer in __page_check_address() fix build with make 3.80 mm/mempolicy: fix !vma in new_vma_page() MAINTAINERS: add Davidlohr as GPT maintainer mm/memory-failure.c: recheck PageHuge() after hugetlb page migrate successfully mm/compaction: respect ignore_skip_hint in update_pageblock_skip mm/mempolicy: correct putback method for isolate pages if failed mm: add missing dependency in Kconfig sh: always link in helper functions extracted from libgcc mm: page_alloc: exclude unreclaimable allocations from zone fairness policy mm: numa: defer TLB flush for THP migration as long as possible mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates mm: fix TLB flush race between migration, and change_protection_range mm: numa: avoid unnecessary disruption of NUMA hinting during migration mm: numa: clear numa hinting information on mprotect sched: numa: skip inaccessible VMAs mm: numa: avoid unnecessary work on the failure path mm: numa: ensure anon_vma is locked to prevent parallel THP splits mm: numa: do not clear PTE for pte_numa update mm: numa: do not clear PMD during PTE update scan ...
2 parents a36c160 + 98398c3 commit 86fbf16

File tree

24 files changed

+249
-57
lines changed

24 files changed

+249
-57
lines changed

MAINTAINERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3833,6 +3833,12 @@ T: git git://linuxtv.org/media_tree.git
38333833
S: Maintained
38343834
F: drivers/media/usb/gspca/
38353835

3836+
GUID PARTITION TABLE (GPT)
3837+
M: Davidlohr Bueso <davidlohr@hp.com>
3838+
L: linux-efi@vger.kernel.org
3839+
S: Maintained
3840+
F: block/partitions/efi.*
3841+
38363842
STK1160 USB VIDEO CAPTURE DRIVER
38373843
M: Ezequiel Garcia <elezegarcia@gmail.com>
38383844
L: linux-media@vger.kernel.org

Makefile

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -732,19 +732,13 @@ export mod_strip_cmd
732732
# Select initial ramdisk compression format, default is gzip(1).
733733
# This shall be used by the dracut(8) tool while creating an initramfs image.
734734
#
735-
INITRD_COMPRESS=gzip
736-
ifeq ($(CONFIG_RD_BZIP2), y)
737-
INITRD_COMPRESS=bzip2
738-
else ifeq ($(CONFIG_RD_LZMA), y)
739-
INITRD_COMPRESS=lzma
740-
else ifeq ($(CONFIG_RD_XZ), y)
741-
INITRD_COMPRESS=xz
742-
else ifeq ($(CONFIG_RD_LZO), y)
743-
INITRD_COMPRESS=lzo
744-
else ifeq ($(CONFIG_RD_LZ4), y)
745-
INITRD_COMPRESS=lz4
746-
endif
747-
export INITRD_COMPRESS
735+
INITRD_COMPRESS-y := gzip
736+
INITRD_COMPRESS-$(CONFIG_RD_BZIP2) := bzip2
737+
INITRD_COMPRESS-$(CONFIG_RD_LZMA) := lzma
738+
INITRD_COMPRESS-$(CONFIG_RD_XZ) := xz
739+
INITRD_COMPRESS-$(CONFIG_RD_LZO) := lzo
740+
INITRD_COMPRESS-$(CONFIG_RD_LZ4) := lz4
741+
export INITRD_COMPRESS := $(INITRD_COMPRESS-y)
748742

749743
ifdef CONFIG_MODULE_SIG_ALL
750744
MODSECKEY = ./signing_key.priv

arch/sh/lib/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ lib-y = delay.o memmove.o memchr.o \
66
checksum.o strlen.o div64.o div64-generic.o
77

88
# Extracted from libgcc
9-
lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
9+
obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
1010
ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \
1111
udiv_qrnnd.o
1212

arch/sparc/include/asm/pgtable_64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ static inline unsigned long pte_present(pte_t pte)
619619
}
620620

621621
#define pte_accessible pte_accessible
622-
static inline unsigned long pte_accessible(pte_t a)
622+
static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
623623
{
624624
return pte_val(a) & _PAGE_VALID;
625625
}
@@ -847,7 +847,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
847847
* SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
848848
* and SUN4V pte layout, so this inline test is fine.
849849
*/
850-
if (likely(mm != &init_mm) && pte_accessible(orig))
850+
if (likely(mm != &init_mm) && pte_accessible(mm, orig))
851851
tlb_batch_add(mm, addr, ptep, orig, fullmm);
852852
}
853853

arch/x86/include/asm/pgtable.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,9 +452,16 @@ static inline int pte_present(pte_t a)
452452
}
453453

454454
#define pte_accessible pte_accessible
455-
static inline int pte_accessible(pte_t a)
455+
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
456456
{
457-
return pte_flags(a) & _PAGE_PRESENT;
457+
if (pte_flags(a) & _PAGE_PRESENT)
458+
return true;
459+
460+
if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
461+
mm_tlb_flush_pending(mm))
462+
return true;
463+
464+
return false;
458465
}
459466

460467
static inline int pte_hidden(pte_t pte)

arch/x86/mm/gup.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
8383
pte_t pte = gup_get_pte(ptep);
8484
struct page *page;
8585

86+
/* Similar to the PMD case, NUMA hinting must take slow path */
87+
if (pte_numa(pte)) {
88+
pte_unmap(ptep);
89+
return 0;
90+
}
91+
8692
if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
8793
pte_unmap(ptep);
8894
return 0;
@@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
167173
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
168174
return 0;
169175
if (unlikely(pmd_large(pmd))) {
176+
/*
177+
* NUMA hinting faults need to be handled in the GUP
178+
* slowpath for accounting purposes and so that they
179+
* can be serialised against THP migration.
180+
*/
181+
if (pmd_numa(pmd))
182+
return 0;
170183
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
171184
return 0;
172185
} else {

include/asm-generic/pgtable.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
217217
#endif
218218

219219
#ifndef pte_accessible
220-
# define pte_accessible(pte) ((void)(pte),1)
220+
# define pte_accessible(mm, pte) ((void)(pte), 1)
221221
#endif
222222

223223
#ifndef flush_tlb_fix_spurious_fault

include/linux/migrate.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,19 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
9090
#endif /* CONFIG_MIGRATION */
9191

9292
#ifdef CONFIG_NUMA_BALANCING
93+
extern bool pmd_trans_migrating(pmd_t pmd);
94+
extern void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd);
9395
extern int migrate_misplaced_page(struct page *page,
9496
struct vm_area_struct *vma, int node);
9597
extern bool migrate_ratelimited(int node);
9698
#else
99+
static inline bool pmd_trans_migrating(pmd_t pmd)
100+
{
101+
return false;
102+
}
103+
static inline void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
104+
{
105+
}
97106
static inline int migrate_misplaced_page(struct page *page,
98107
struct vm_area_struct *vma, int node)
99108
{

include/linux/mm_types.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,14 @@ struct mm_struct {
442442

443443
/* numa_scan_seq prevents two threads setting pte_numa */
444444
int numa_scan_seq;
445+
#endif
446+
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
447+
/*
448+
* An operation with batched TLB flushing is going on. Anything that
449+
* can move process memory needs to flush the TLB when moving a
450+
* PROT_NONE or PROT_NUMA mapped page.
451+
*/
452+
bool tlb_flush_pending;
445453
#endif
446454
struct uprobes_state uprobes_state;
447455
};
@@ -459,4 +467,45 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
459467
return mm->cpu_vm_mask_var;
460468
}
461469

470+
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
471+
/*
472+
* Memory barriers to keep this state in sync are graciously provided by
473+
* the page table locks, outside of which no page table modifications happen.
474+
* The barriers below prevent the compiler from re-ordering the instructions
475+
* around the memory barriers that are already present in the code.
476+
*/
477+
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
478+
{
479+
barrier();
480+
return mm->tlb_flush_pending;
481+
}
482+
static inline void set_tlb_flush_pending(struct mm_struct *mm)
483+
{
484+
mm->tlb_flush_pending = true;
485+
486+
/*
487+
* Guarantee that the tlb_flush_pending store does not leak into the
488+
* critical section updating the page tables
489+
*/
490+
smp_mb__before_spinlock();
491+
}
492+
/* Clearing is done after a TLB flush, which also provides a barrier. */
493+
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
494+
{
495+
barrier();
496+
mm->tlb_flush_pending = false;
497+
}
498+
#else
499+
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
500+
{
501+
return false;
502+
}
503+
static inline void set_tlb_flush_pending(struct mm_struct *mm)
504+
{
505+
}
506+
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
507+
{
508+
}
509+
#endif
510+
462511
#endif /* _LINUX_MM_TYPES_H */

include/linux/reboot.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ extern int unregister_reboot_notifier(struct notifier_block *);
4343
* Architecture-specific implementations of sys_reboot commands.
4444
*/
4545

46+
extern void migrate_to_reboot_cpu(void);
4647
extern void machine_restart(char *cmd);
4748
extern void machine_halt(void);
4849
extern void machine_power_off(void);

kernel/fork.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
537537
spin_lock_init(&mm->page_table_lock);
538538
mm_init_aio(mm);
539539
mm_init_owner(mm, p);
540+
clear_tlb_flush_pending(mm);
540541

541542
if (likely(!mm_alloc_pgd(mm))) {
542543
mm->def_flags = 0;

kernel/kexec.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,6 +1680,7 @@ int kernel_kexec(void)
16801680
{
16811681
kexec_in_progress = true;
16821682
kernel_restart_prepare(NULL);
1683+
migrate_to_reboot_cpu();
16831684
printk(KERN_EMERG "Starting new kernel\n");
16841685
machine_shutdown();
16851686
}

kernel/reboot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ int unregister_reboot_notifier(struct notifier_block *nb)
104104
}
105105
EXPORT_SYMBOL(unregister_reboot_notifier);
106106

107-
static void migrate_to_reboot_cpu(void)
107+
void migrate_to_reboot_cpu(void)
108108
{
109109
/* The boot cpu is always logical cpu 0 */
110110
int cpu = reboot_cpu;

kernel/sched/fair.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1738,6 +1738,13 @@ void task_numa_work(struct callback_head *work)
17381738
(vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
17391739
continue;
17401740

1741+
/*
1742+
* Skip inaccessible VMAs to avoid any confusion between
1743+
* PROT_NONE and NUMA hinting ptes
1744+
*/
1745+
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1746+
continue;
1747+
17411748
do {
17421749
start = max(start, vma->vm_start);
17431750
end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);

mm/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ config ZSWAP
543543

544544
config MEM_SOFT_DIRTY
545545
bool "Track memory changes"
546-
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY
546+
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
547547
select PROC_PAGE_MONITOR
548548
help
549549
This option enables memory changes tracking by introducing a

mm/compaction.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ static void update_pageblock_skip(struct compact_control *cc,
134134
bool migrate_scanner)
135135
{
136136
struct zone *zone = cc->zone;
137+
138+
if (cc->ignore_skip_hint)
139+
return;
140+
137141
if (!page)
138142
return;
139143

mm/huge_memory.c

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
882882
ret = 0;
883883
goto out_unlock;
884884
}
885+
886+
/* mmap_sem prevents this happening but warn if that changes */
887+
WARN_ON(pmd_trans_migrating(pmd));
888+
885889
if (unlikely(pmd_trans_splitting(pmd))) {
886890
/* split huge page running from under us */
887891
spin_unlock(src_ptl);
@@ -1243,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
12431247
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
12441248
return ERR_PTR(-EFAULT);
12451249

1250+
/* Full NUMA hinting faults to serialise migration in fault paths */
1251+
if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
1252+
goto out;
1253+
12461254
page = pmd_page(*pmd);
12471255
VM_BUG_ON(!PageHead(page));
12481256
if (flags & FOLL_TOUCH) {
@@ -1295,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
12951303
if (unlikely(!pmd_same(pmd, *pmdp)))
12961304
goto out_unlock;
12971305

1306+
/*
1307+
* If there are potential migrations, wait for completion and retry
1308+
* without disrupting NUMA hinting information. Do not relock and
1309+
* check_same as the page may no longer be mapped.
1310+
*/
1311+
if (unlikely(pmd_trans_migrating(*pmdp))) {
1312+
spin_unlock(ptl);
1313+
wait_migrate_huge_page(vma->anon_vma, pmdp);
1314+
goto out;
1315+
}
1316+
12981317
page = pmd_page(pmd);
12991318
BUG_ON(is_huge_zero_page(page));
13001319
page_nid = page_to_nid(page);
@@ -1323,23 +1342,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
13231342
/* If the page was locked, there are no parallel migrations */
13241343
if (page_locked)
13251344
goto clear_pmdnuma;
1345+
}
13261346

1327-
/*
1328-
* Otherwise wait for potential migrations and retry. We do
1329-
* relock and check_same as the page may no longer be mapped.
1330-
* As the fault is being retried, do not account for it.
1331-
*/
1347+
/* Migration could have started since the pmd_trans_migrating check */
1348+
if (!page_locked) {
13321349
spin_unlock(ptl);
13331350
wait_on_page_locked(page);
13341351
page_nid = -1;
13351352
goto out;
13361353
}
13371354

1338-
/* Page is misplaced, serialise migrations and parallel THP splits */
1355+
/*
1356+
* Page is misplaced. Page lock serialises migrations. Acquire anon_vma
1357+
* to serialises splits
1358+
*/
13391359
get_page(page);
13401360
spin_unlock(ptl);
1341-
if (!page_locked)
1342-
lock_page(page);
13431361
anon_vma = page_lock_anon_vma_read(page);
13441362

13451363
/* Confirm the PMD did not change while page_table_lock was released */
@@ -1351,6 +1369,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
13511369
goto out_unlock;
13521370
}
13531371

1372+
/* Bail if we fail to protect against THP splits for any reason */
1373+
if (unlikely(!anon_vma)) {
1374+
put_page(page);
1375+
page_nid = -1;
1376+
goto clear_pmdnuma;
1377+
}
1378+
13541379
/*
13551380
* Migrate the THP to the requested node, returns with page unlocked
13561381
* and pmd_numa cleared.
@@ -1517,6 +1542,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
15171542
ret = 1;
15181543
if (!prot_numa) {
15191544
entry = pmdp_get_and_clear(mm, addr, pmd);
1545+
if (pmd_numa(entry))
1546+
entry = pmd_mknonnuma(entry);
15201547
entry = pmd_modify(entry, newprot);
15211548
ret = HPAGE_PMD_NR;
15221549
BUG_ON(pmd_write(entry));
@@ -1531,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
15311558
*/
15321559
if (!is_huge_zero_page(page) &&
15331560
!pmd_numa(*pmd)) {
1534-
entry = pmdp_get_and_clear(mm, addr, pmd);
1561+
entry = *pmd;
15351562
entry = pmd_mknuma(entry);
15361563
ret = HPAGE_PMD_NR;
15371564
}

mm/memory-failure.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,10 +1505,16 @@ static int soft_offline_huge_page(struct page *page, int flags)
15051505
if (ret > 0)
15061506
ret = -EIO;
15071507
} else {
1508-
set_page_hwpoison_huge_page(hpage);
1509-
dequeue_hwpoisoned_huge_page(hpage);
1510-
atomic_long_add(1 << compound_order(hpage),
1511-
&num_poisoned_pages);
1508+
/* overcommit hugetlb page will be freed to buddy */
1509+
if (PageHuge(page)) {
1510+
set_page_hwpoison_huge_page(hpage);
1511+
dequeue_hwpoisoned_huge_page(hpage);
1512+
atomic_long_add(1 << compound_order(hpage),
1513+
&num_poisoned_pages);
1514+
} else {
1515+
SetPageHWPoison(page);
1516+
atomic_long_inc(&num_poisoned_pages);
1517+
}
15121518
}
15131519
return ret;
15141520
}

0 commit comments

Comments
 (0)