Skip to content

Commit 806fdcc

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes: a binutils fix, an lguest fix, an mcelog fix and a missing documentation fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Avoid using object after free in genpool lguest, x86/entry/32: Fix handling of guest syscalls using interrupt gates x86/build: Build compressed x86 kernels as PIE x86/mm/pkeys: Add missing Documentation
2 parents a1f9831 + a312549 commit 806fdcc

File tree

8 files changed

+89
-5
lines changed

8 files changed

+89
-5
lines changed

Documentation/x86/protection-keys.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
2+
which will be found on future Intel CPUs.
3+
4+
Memory Protection Keys provides a mechanism for enforcing page-based
5+
protections, but without requiring modification of the page tables
6+
when an application changes protection domains. It works by
7+
dedicating 4 previously ignored bits in each page table entry to a
8+
"protection key", giving 16 possible keys.
9+
10+
There is also a new user-accessible register (PKRU) with two separate
11+
bits (Access Disable and Write Disable) for each key. Being a CPU
12+
register, PKRU is inherently thread-local, potentially giving each
13+
thread a different set of protections from every other thread.
14+
15+
There are two new instructions (RDPKRU/WRPKRU) for reading and writing
16+
to the new register. The feature is only available in 64-bit mode,
17+
even though there is theoretically space in the PAE PTEs. These
18+
permissions are enforced on data access only and have no effect on
19+
instruction fetches.
20+
21+
=========================== Config Option ===========================
22+
23+
This config option adds approximately 1.5kb of text. and 50 bytes of
24+
data to the executable. A workload which does large O_DIRECT reads
25+
of holes in XFS files was run to exercise get_user_pages_fast(). No
26+
performance delta was observed with the config option
27+
enabled or disabled.

arch/x86/boot/compressed/Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
2626
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
2727

2828
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
29-
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
29+
KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
3030
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
3131
cflags-$(CONFIG_X86_32) := -march=i386
3232
cflags-$(CONFIG_X86_64) := -mcmodel=small
@@ -40,6 +40,18 @@ GCOV_PROFILE := n
4040
UBSAN_SANITIZE :=n
4141

4242
LDFLAGS := -m elf_$(UTS_MACHINE)
43+
ifeq ($(CONFIG_RELOCATABLE),y)
44+
# If kernel is relocatable, build compressed kernel as PIE.
45+
ifeq ($(CONFIG_X86_32),y)
46+
LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
47+
else
48+
# To build 64-bit compressed kernel as PIE, we disable relocation
49+
# overflow check to avoid relocation overflow error with a new linker
50+
# command-line option, -z noreloc-overflow.
51+
LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
52+
&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
53+
endif
54+
endif
4355
LDFLAGS_vmlinux := -T
4456

4557
hostprogs-y := mkpiggy

arch/x86/boot/compressed/head_32.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,34 @@
3131
#include <asm/asm-offsets.h>
3232
#include <asm/bootparam.h>
3333

34+
/*
35+
* The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
36+
* relocation to get the symbol address in PIC. When the compressed x86
37+
* kernel isn't built as PIC, the linker optimizes R_386_GOT32X
38+
* relocations to their fixed symbol addresses. However, when the
39+
* compressed x86 kernel is loaded at a different address, it leads
40+
* to the following load failure:
41+
*
42+
* Failed to allocate space for phdrs
43+
*
44+
* during the decompression stage.
45+
*
46+
* If the compressed x86 kernel is relocatable at run-time, it should be
47+
* compiled with -fPIE, instead of -fPIC, if possible and should be built as
48+
* Position Independent Executable (PIE) so that linker won't optimize
49+
* R_386_GOT32X relocation to its fixed symbol address. Older
50+
* linkers generate R_386_32 relocations against locally defined symbols,
51+
* _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less
52+
* optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
53+
* R_386_32 relocations when relocating the kernel. To generate
54+
* R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
55+
* hidden:
56+
*/
57+
.hidden _bss
58+
.hidden _ebss
59+
.hidden _got
60+
.hidden _egot
61+
3462
__HEAD
3563
ENTRY(startup_32)
3664
#ifdef CONFIG_EFI_STUB

arch/x86/boot/compressed/head_64.S

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@
3333
#include <asm/asm-offsets.h>
3434
#include <asm/bootparam.h>
3535

36+
/*
37+
* Locally defined symbols should be marked hidden:
38+
*/
39+
.hidden _bss
40+
.hidden _ebss
41+
.hidden _got
42+
.hidden _egot
43+
3644
__HEAD
3745
.code32
3846
ENTRY(startup_32)

arch/x86/kernel/cpu/mcheck/mce-genpool.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@ static char gen_pool_buf[MCE_POOLSZ];
2929
void mce_gen_pool_process(void)
3030
{
3131
struct llist_node *head;
32-
struct mce_evt_llist *node;
32+
struct mce_evt_llist *node, *tmp;
3333
struct mce *mce;
3434

3535
head = llist_del_all(&mce_event_llist);
3636
if (!head)
3737
return;
3838

3939
head = llist_reverse_order(head);
40-
llist_for_each_entry(node, head, llnode) {
40+
llist_for_each_entry_safe(node, tmp, head, llnode) {
4141
mce = &node->mce;
4242
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
4343
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));

drivers/lguest/interrupts_and_traps.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
331331
* Actually now I think of it, it's possible that Ron *is* half the Plan 9
332332
* userbase. Oh well.
333333
*/
334-
static bool could_be_syscall(unsigned int num)
334+
bool could_be_syscall(unsigned int num)
335335
{
336336
/* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */
337337
return num == IA32_SYSCALL_VECTOR || num == syscall_vector;
@@ -416,6 +416,10 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
416416
*
417417
* This routine indicates if a particular trap number could be delivered
418418
* directly.
419+
*
420+
* Unfortunately, Linux 4.6 started using an interrupt gate instead of a
421+
* trap gate for syscalls, so this trick is ineffective. See Mastery for
422+
* how we could do this anyway...
419423
*/
420424
static bool direct_trap(unsigned int num)
421425
{

drivers/lguest/lg.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
167167
bool send_notify_to_eventfd(struct lg_cpu *cpu);
168168
void init_clockdev(struct lg_cpu *cpu);
169169
bool check_syscall_vector(struct lguest *lg);
170+
bool could_be_syscall(unsigned int num);
170171
int init_interrupts(void);
171172
void free_interrupts(void);
172173

drivers/lguest/x86/core.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,12 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
429429
return;
430430
break;
431431
case 32 ... 255:
432+
/* This might be a syscall. */
433+
if (could_be_syscall(cpu->regs->trapnum))
434+
break;
435+
432436
/*
433-
* These values mean a real interrupt occurred, in which case
437+
* Other values mean a real interrupt occurred, in which case
434438
* the Host handler has already been run. We just do a
435439
* friendly check if another process should now be run, then
436440
* return to run the Guest again.

0 commit comments

Comments
 (0)