Skip to content

Commit b569bab

Browse files
kirylIngo Molnar
authored andcommitted
x86/mm: Prepare to expose larger address space to userspace
On x86, 5-level paging enables 56-bit userspace virtual address space. Not all user space is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. It collides with valid pointers with 5-level paging and leads to crashes. To mitigate this, we are not going to allocate virtual address space above 47-bit by default. But userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 47-bits. If hint address set above 47-bit, but MAP_FIXED is not specified, we try to look for unmapped area by specified address. If it's already occupied, we look for unmapped area in *full* address space, rather than from 47-bit window. A high hint address would only affect the allocation in question, but not any future mmap()s. Specifying high hint address on older kernel or on machine without 5-level paging support is safe. The hint will be ignored and kernel will fall back to allocation from 47-bit address space. This approach helps to easily make application's memory allocator aware about large address space without manually tracking allocated virtual address space. The patch puts all machinery in place, but not yet allows userspace to have mappings above 47-bit -- TASK_SIZE_MAX has to be raised to get the effect. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170716225954.74185-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 44b0491 commit b569bab

File tree

5 files changed

+43
-14
lines changed

5 files changed

+43
-14
lines changed

arch/x86/include/asm/elf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ static inline int mmap_is_ia32(void)
305305
}
306306

307307
extern unsigned long task_size_32bit(void);
308-
extern unsigned long task_size_64bit(void);
308+
extern unsigned long task_size_64bit(int full_addr_space);
309309
extern unsigned long get_mmap_base(int is_legacy);
310310

311311
#ifdef CONFIG_X86_32

arch/x86/include/asm/processor.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,7 @@ static inline void spin_lock_prefetch(const void *x)
808808
*/
809809
#define IA32_PAGE_OFFSET PAGE_OFFSET
810810
#define TASK_SIZE PAGE_OFFSET
811+
#define TASK_SIZE_LOW TASK_SIZE
811812
#define TASK_SIZE_MAX TASK_SIZE
812813
#define DEFAULT_MAP_WINDOW TASK_SIZE
813814
#define STACK_TOP TASK_SIZE
@@ -859,12 +860,14 @@ static inline void spin_lock_prefetch(const void *x)
859860
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
860861
0xc0000000 : 0xFFFFe000)
861862

863+
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
864+
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
862865
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
863866
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
864867
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
865868
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
866869

867-
#define STACK_TOP TASK_SIZE
870+
#define STACK_TOP TASK_SIZE_LOW
868871
#define STACK_TOP_MAX TASK_SIZE_MAX
869872

870873
#define INIT_THREAD { \
@@ -885,7 +888,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
885888
* space during mmap's.
886889
*/
887890
#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
888-
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
891+
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
889892

890893
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
891894

arch/x86/kernel/sys_x86_64.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
101101
return error;
102102
}
103103

104-
static void find_start_end(unsigned long flags, unsigned long *begin,
105-
unsigned long *end)
104+
static void find_start_end(unsigned long addr, unsigned long flags,
105+
unsigned long *begin, unsigned long *end)
106106
{
107107
if (!in_compat_syscall() && (flags & MAP_32BIT)) {
108108
/* This is usually used needed to map code in small
@@ -121,7 +121,10 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
121121
}
122122

123123
*begin = get_mmap_base(1);
124-
*end = in_compat_syscall() ? task_size_32bit() : task_size_64bit();
124+
if (in_compat_syscall())
125+
*end = task_size_32bit();
126+
else
127+
*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
125128
}
126129

127130
unsigned long
@@ -140,7 +143,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
140143
if (flags & MAP_FIXED)
141144
return addr;
142145

143-
find_start_end(flags, &begin, &end);
146+
find_start_end(addr, flags, &begin, &end);
144147

145148
if (len > end)
146149
return -ENOMEM;
@@ -204,6 +207,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
204207
info.length = len;
205208
info.low_limit = PAGE_SIZE;
206209
info.high_limit = get_mmap_base(0);
210+
211+
/*
212+
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
213+
* in the full address space.
214+
*
215+
* !in_compat_syscall() check to avoid high addresses for x32.
216+
*/
217+
if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
218+
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
219+
207220
info.align_mask = 0;
208221
info.align_offset = pgoff << PAGE_SHIFT;
209222
if (filp) {

arch/x86/mm/hugetlbpage.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,25 +86,38 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
8686
info.flags = 0;
8787
info.length = len;
8888
info.low_limit = get_mmap_base(1);
89+
90+
/*
91+
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
92+
* in the full address space.
93+
*/
8994
info.high_limit = in_compat_syscall() ?
90-
task_size_32bit() : task_size_64bit();
95+
task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
96+
9197
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
9298
info.align_offset = 0;
9399
return vm_unmapped_area(&info);
94100
}
95101

96102
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
97-
unsigned long addr0, unsigned long len,
103+
unsigned long addr, unsigned long len,
98104
unsigned long pgoff, unsigned long flags)
99105
{
100106
struct hstate *h = hstate_file(file);
101107
struct vm_unmapped_area_info info;
102-
unsigned long addr;
103108

104109
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
105110
info.length = len;
106111
info.low_limit = PAGE_SIZE;
107112
info.high_limit = get_mmap_base(0);
113+
114+
/*
115+
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
116+
* in the full address space.
117+
*/
118+
if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
119+
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
120+
108121
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
109122
info.align_offset = 0;
110123
addr = vm_unmapped_area(&info);
@@ -119,7 +132,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
119132
VM_BUG_ON(addr != -ENOMEM);
120133
info.flags = 0;
121134
info.low_limit = TASK_UNMAPPED_BASE;
122-
info.high_limit = TASK_SIZE;
135+
info.high_limit = TASK_SIZE_LOW;
123136
addr = vm_unmapped_area(&info);
124137
}
125138

arch/x86/mm/mmap.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ unsigned long task_size_32bit(void)
4242
return IA32_PAGE_OFFSET;
4343
}
4444

45-
unsigned long task_size_64bit(void)
45+
unsigned long task_size_64bit(int full_addr_space)
4646
{
47-
return TASK_SIZE_MAX;
47+
return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
4848
}
4949

5050
static unsigned long stack_maxrandom_size(unsigned long task_size)
@@ -142,7 +142,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
142142
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
143143

144144
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
145-
arch_rnd(mmap64_rnd_bits), task_size_64bit());
145+
arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
146146

147147
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
148148
/*

0 commit comments

Comments
 (0)