Skip to content

Commit 58d4e45

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "14 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes mm/vmalloc.c: don't unconditonally use __GFP_HIGHMEM mm/mempolicy: fix use after free when calling get_mempolicy mm/cma_debug.c: fix stack corruption due to sprintf usage signal: don't remove SIGNAL_UNKILLABLE for traced tasks. mm, oom: fix potential data corruption when oom_reaper races with writer mm: fix double mmap_sem unlock on MMF_UNSTABLE enforced SIGBUS slub: fix per memcg cache leak on css offline mm: discard memblock data later test_kmod: fix description for -s -and -c parameters kmod: fix wait on recursive loop wait: add wait_event_killable_timeout() kernel/watchdog: fix Kconfig constraints for perf hardlockup watchdog mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback()
2 parents cc28fcd + c715b72 commit 58d4e45

File tree

22 files changed

+224
-103
lines changed

22 files changed

+224
-103
lines changed

arch/arm64/include/asm/elf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,10 @@
114114

115115
/*
116116
* This is the base location for PIE (ET_DYN with INTERP) loads. On
117-
* 64-bit, this is raised to 4GB to leave the entire 32-bit address
117+
* 64-bit, this is above 4GB to leave the entire 32-bit address
118118
* space open for things that want to use the area for 32-bit pointers.
119119
*/
120-
#define ELF_ET_DYN_BASE 0x100000000UL
120+
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
121121

122122
#ifndef __ASSEMBLY__
123123

arch/powerpc/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ config PPC
199199
select HAVE_OPTPROBES if PPC64
200200
select HAVE_PERF_EVENTS
201201
select HAVE_PERF_EVENTS_NMI if PPC64
202-
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
202+
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
203203
select HAVE_PERF_REGS
204204
select HAVE_PERF_USER_STACK_DUMP
205205
select HAVE_RCU_TABLE_FREE if SMP

arch/x86/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ config X86
163163
select HAVE_PCSPKR_PLATFORM
164164
select HAVE_PERF_EVENTS
165165
select HAVE_PERF_EVENTS_NMI
166-
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
166+
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
167167
select HAVE_PERF_REGS
168168
select HAVE_PERF_USER_STACK_DUMP
169169
select HAVE_REGS_AND_STACK_ACCESS_API

arch/x86/include/asm/elf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,11 @@ extern int force_personality32;
247247

248248
/*
249249
* This is the base location for PIE (ET_DYN with INTERP) loads. On
250-
* 64-bit, this is raised to 4GB to leave the entire 32-bit address
250+
* 64-bit, this is above 4GB to leave the entire 32-bit address
251251
* space open for things that want to use the area for 32-bit pointers.
252252
*/
253253
#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
254-
0x100000000UL)
254+
(TASK_SIZE / 3 * 2))
255255

256256
/* This yields a mask that user programs can use to figure out what
257257
instruction set this CPU supports. This could be done in user space,

include/linux/memblock.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ extern int memblock_debug;
6161
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
6262
#define __init_memblock __meminit
6363
#define __initdata_memblock __meminitdata
64+
void memblock_discard(void);
6465
#else
6566
#define __init_memblock
6667
#define __initdata_memblock
@@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
7475
int nid, ulong flags);
7576
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
7677
phys_addr_t size, phys_addr_t align);
77-
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
78-
phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
7978
void memblock_allow_resize(void);
8079
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
8180
int memblock_add(phys_addr_t base, phys_addr_t size);
@@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
110109
void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
111110
phys_addr_t *out_end);
112111

112+
void __memblock_free_early(phys_addr_t base, phys_addr_t size);
113+
void __memblock_free_late(phys_addr_t base, phys_addr_t size);
114+
113115
/**
114116
* for_each_mem_range - iterate through memblock areas from type_a and not
115117
* included in type_b. Or just type_a if type_b is NULL.

include/linux/memcontrol.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait);
484484
extern int do_swap_account;
485485
#endif
486486

487-
void lock_page_memcg(struct page *page);
487+
struct mem_cgroup *lock_page_memcg(struct page *page);
488+
void __unlock_page_memcg(struct mem_cgroup *memcg);
488489
void unlock_page_memcg(struct page *page);
489490

490491
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
@@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
809810
{
810811
}
811812

812-
static inline void lock_page_memcg(struct page *page)
813+
static inline struct mem_cgroup *lock_page_memcg(struct page *page)
814+
{
815+
return NULL;
816+
}
817+
818+
static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
813819
{
814820
}
815821

include/linux/oom.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <linux/types.h>
77
#include <linux/nodemask.h>
88
#include <uapi/linux/oom.h>
9+
#include <linux/sched/coredump.h> /* MMF_* */
10+
#include <linux/mm.h> /* VM_FAULT* */
911

1012
struct zonelist;
1113
struct notifier_block;
@@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
6365
return tsk->signal->oom_mm;
6466
}
6567

68+
/*
69+
* Checks whether a page fault on the given mm is still reliable.
70+
* This is no longer true if the oom reaper started to reap the
71+
* address space which is reflected by MMF_UNSTABLE flag set in
72+
* the mm. At that moment any !shared mapping would lose the content
73+
* and could cause a memory corruption (zero pages instead of the
74+
* original content).
75+
*
76+
* User should call this before establishing a page table entry for
77+
* a !shared mapping and under the proper page table lock.
78+
*
79+
* Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
80+
*/
81+
static inline int check_stable_address_space(struct mm_struct *mm)
82+
{
83+
if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
84+
return VM_FAULT_SIGBUS;
85+
return 0;
86+
}
87+
6688
extern unsigned long oom_badness(struct task_struct *p,
6789
struct mem_cgroup *memcg, const nodemask_t *nodemask,
6890
unsigned long totalpages);

include/linux/wait.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
757757
__ret; \
758758
})
759759

760+
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
761+
___wait_event(wq_head, ___wait_cond_timeout(condition), \
762+
TASK_KILLABLE, 0, timeout, \
763+
__ret = schedule_timeout(__ret))
764+
765+
/**
766+
* wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses
767+
* @wq_head: the waitqueue to wait on
768+
* @condition: a C expression for the event to wait for
769+
* @timeout: timeout, in jiffies
770+
*
771+
* The process is put to sleep (TASK_KILLABLE) until the
772+
* @condition evaluates to true or a kill signal is received.
773+
* The @condition is checked each time the waitqueue @wq_head is woken up.
774+
*
775+
* wake_up() has to be called after changing any variable that could
776+
* change the result of the wait condition.
777+
*
778+
* Returns:
779+
* 0 if the @condition evaluated to %false after the @timeout elapsed,
780+
* 1 if the @condition evaluated to %true after the @timeout elapsed,
781+
* the remaining jiffies (at least 1) if the @condition evaluated
782+
* to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
783+
* interrupted by a kill signal.
784+
*
785+
* Only kill signals interrupt this process.
786+
*/
787+
#define wait_event_killable_timeout(wq_head, condition, timeout) \
788+
({ \
789+
long __ret = timeout; \
790+
might_sleep(); \
791+
if (!___wait_cond_timeout(condition)) \
792+
__ret = __wait_event_killable_timeout(wq_head, \
793+
condition, timeout); \
794+
__ret; \
795+
})
796+
760797

761798
#define __wait_event_lock_irq(wq_head, condition, lock, cmd) \
762799
(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \

kernel/kmod.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem);
7070
static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
7171
static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
7272

73+
/*
74+
* This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
75+
* running at the same time without returning. When this happens we
76+
* believe you've somehow ended up with a recursive module dependency
77+
* creating a loop.
78+
*
79+
* We have no option but to fail.
80+
*
81+
* Userspace should proactively try to detect and prevent these.
82+
*/
83+
#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
84+
7385
/*
7486
modprobe_path is set via /proc/sys.
7587
*/
@@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...)
167179
pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
168180
atomic_read(&kmod_concurrent_max),
169181
MAX_KMOD_CONCURRENT, module_name);
170-
wait_event_interruptible(kmod_wq,
171-
atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
182+
ret = wait_event_killable_timeout(kmod_wq,
183+
atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
184+
MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
185+
if (!ret) {
186+
pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
187+
module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
188+
return -ETIME;
189+
} else if (ret == -ERESTARTSYS) {
190+
pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
191+
return ret;
192+
}
172193
}
173194

174195
trace_module_request(module_name, wait, _RET_IP_);

kernel/signal.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
11941194
recalc_sigpending_and_wake(t);
11951195
}
11961196
}
1197-
if (action->sa.sa_handler == SIG_DFL)
1197+
/*
1198+
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
1199+
* debugging to leave init killable.
1200+
*/
1201+
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
11981202
t->signal->flags &= ~SIGNAL_UNKILLABLE;
11991203
ret = specific_send_sig_info(sig, info, t);
12001204
spin_unlock_irqrestore(&t->sighand->siglock, flags);

mm/cma_debug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx)
167167
char name[16];
168168
int u32s;
169169

170-
sprintf(name, "cma-%s", cma->name);
170+
scnprintf(name, sizeof(name), "cma-%s", cma->name);
171171

172172
tmp = debugfs_create_dir(name, cma_debugfs_root);
173173

mm/huge_memory.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <linux/userfaultfd_k.h>
3333
#include <linux/page_idle.h>
3434
#include <linux/shmem_fs.h>
35+
#include <linux/oom.h>
3536

3637
#include <asm/tlb.h>
3738
#include <asm/pgalloc.h>
@@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
550551
struct mem_cgroup *memcg;
551552
pgtable_t pgtable;
552553
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
554+
int ret = 0;
553555

554556
VM_BUG_ON_PAGE(!PageCompound(page), page);
555557

@@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
561563

562564
pgtable = pte_alloc_one(vma->vm_mm, haddr);
563565
if (unlikely(!pgtable)) {
564-
mem_cgroup_cancel_charge(page, memcg, true);
565-
put_page(page);
566-
return VM_FAULT_OOM;
566+
ret = VM_FAULT_OOM;
567+
goto release;
567568
}
568569

569570
clear_huge_page(page, haddr, HPAGE_PMD_NR);
@@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
576577

577578
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
578579
if (unlikely(!pmd_none(*vmf->pmd))) {
579-
spin_unlock(vmf->ptl);
580-
mem_cgroup_cancel_charge(page, memcg, true);
581-
put_page(page);
582-
pte_free(vma->vm_mm, pgtable);
580+
goto unlock_release;
583581
} else {
584582
pmd_t entry;
585583

584+
ret = check_stable_address_space(vma->vm_mm);
585+
if (ret)
586+
goto unlock_release;
587+
586588
/* Deliver the page fault to userland */
587589
if (userfaultfd_missing(vma)) {
588590
int ret;
@@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
610612
}
611613

612614
return 0;
615+
unlock_release:
616+
spin_unlock(vmf->ptl);
617+
release:
618+
if (pgtable)
619+
pte_free(vma->vm_mm, pgtable);
620+
mem_cgroup_cancel_charge(page, memcg, true);
621+
put_page(page);
622+
return ret;
623+
613624
}
614625

615626
/*
@@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
688699
ret = 0;
689700
set = false;
690701
if (pmd_none(*vmf->pmd)) {
691-
if (userfaultfd_missing(vma)) {
702+
ret = check_stable_address_space(vma->vm_mm);
703+
if (ret) {
704+
spin_unlock(vmf->ptl);
705+
} else if (userfaultfd_missing(vma)) {
692706
spin_unlock(vmf->ptl);
693707
ret = handle_userfault(vmf, VM_UFFD_MISSING);
694708
VM_BUG_ON(ret & VM_FAULT_FALLBACK);

mm/memblock.c

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
285285
}
286286

287287
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
288-
289-
phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
290-
phys_addr_t *addr)
291-
{
292-
if (memblock.reserved.regions == memblock_reserved_init_regions)
293-
return 0;
294-
295-
*addr = __pa(memblock.reserved.regions);
296-
297-
return PAGE_ALIGN(sizeof(struct memblock_region) *
298-
memblock.reserved.max);
299-
}
300-
301-
phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
302-
phys_addr_t *addr)
288+
/**
289+
* Discard memory and reserved arrays if they were allocated
290+
*/
291+
void __init memblock_discard(void)
303292
{
304-
if (memblock.memory.regions == memblock_memory_init_regions)
305-
return 0;
293+
phys_addr_t addr, size;
306294

307-
*addr = __pa(memblock.memory.regions);
295+
if (memblock.reserved.regions != memblock_reserved_init_regions) {
296+
addr = __pa(memblock.reserved.regions);
297+
size = PAGE_ALIGN(sizeof(struct memblock_region) *
298+
memblock.reserved.max);
299+
__memblock_free_late(addr, size);
300+
}
308301

309-
return PAGE_ALIGN(sizeof(struct memblock_region) *
310-
memblock.memory.max);
302+
if (memblock.memory.regions == memblock_memory_init_regions) {
303+
addr = __pa(memblock.memory.regions);
304+
size = PAGE_ALIGN(sizeof(struct memblock_region) *
305+
memblock.memory.max);
306+
__memblock_free_late(addr, size);
307+
}
311308
}
312-
313309
#endif
314310

315311
/**

0 commit comments

Comments
 (0)