Skip to content

Commit b6df7b6

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "A set of x86 fixes: - Cure the LDT remapping to user space on 5 level paging which ended up in the KASLR space - Remove LDT mapping before freeing the LDT pages - Make NFIT MCE handling more robust - Unbreak the VSMP build by removing the dependency on paravirt ops - Support broken PIT emulation on Microsoft hyperV - Don't trace vmware_sched_clock() to avoid tracer recursion - Remove -pipe from KBUILD CFLAGS which breaks clang and is also slower on GCC - Trivial coding style and typo fixes" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/cpu/vmware: Do not trace vmware_sched_clock() x86/vsmp: Remove dependency on pv_irq_ops x86/ldt: Remove unused variable in map_ldt_struct() x86/ldt: Unmap PTEs for the slot before freeing LDT pages x86/mm: Move LDT remap out of KASLR region on 5-level paging acpi/nfit, x86/mce: Validate a MCE's address before using it acpi/nfit, x86/mce: Handle only uncorrectable machine checks x86/build: Remove -pipe from KBUILD_CFLAGS x86/hyper-v: Fix indentation in hv_do_fast_hypercall16() Documentation/x86: Fix typo in zero-page.txt x86/hyper-v: Enable PIT shutdown quirk clockevents/drivers/i8253: Add support for PIT shutdown quirk
2 parents 655c6b9 + 1503538 commit b6df7b6

File tree

17 files changed

+114
-138
lines changed

17 files changed

+114
-138
lines changed

Documentation/x86/x86_64/mm.txt

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,24 @@ __________________|____________|__________________|_________|___________________
3434
____________________________________________________________|___________________________________________________________
3535
| | | |
3636
ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
37-
ffff880000000000 | -120 TB | ffffc7ffffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
38-
ffffc80000000000 | -56 TB | ffffc8ffffffffff | 1 TB | ... unused hole
37+
ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI
38+
ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
39+
ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole
3940
ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base)
4041
ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole
4142
ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base)
4243
ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole
4344
ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory
44-
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
45-
| | | | vaddr_end for KASLR
46-
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
47-
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | LDT remap for PTI
48-
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
4945
__________________|____________|__________________|_________|____________________________________________________________
5046
|
51-
| Identical layout to the 47-bit one from here on:
47+
| Identical layout to the 56-bit one from here on:
5248
____________________________________________________________|____________________________________________________________
5349
| | | |
50+
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
51+
| | | | vaddr_end for KASLR
52+
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
53+
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
54+
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
5455
ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
5556
ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
5657
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
@@ -83,31 +84,32 @@ Notes:
8384
__________________|____________|__________________|_________|___________________________________________________________
8485
| | | |
8586
0000800000000000 | +64 PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
86-
| | | | virtual memory addresses up to the -128 TB
87+
| | | | virtual memory addresses up to the -64 PB
8788
| | | | starting offset of kernel mappings.
8889
__________________|____________|__________________|_________|___________________________________________________________
8990
|
9091
| Kernel-space virtual memory, shared between all processes:
9192
____________________________________________________________|___________________________________________________________
9293
| | | |
9394
ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
94-
ff10000000000000 | -60 PB | ff8fffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
95-
ff90000000000000 | -28 PB | ff9fffffffffffff | 4 PB | LDT remap for PTI
95+
ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
96+
ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
97+
ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
9698
ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
9799
ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole
98100
ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base)
99101
ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole
100102
ffdf000000000000 | -8.25 PB | fffffdffffffffff | ~8 PB | KASAN shadow memory
101-
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
102-
| | | | vaddr_end for KASLR
103-
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
104-
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
105-
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
106103
__________________|____________|__________________|_________|____________________________________________________________
107104
|
108105
| Identical layout to the 47-bit one from here on:
109106
____________________________________________________________|____________________________________________________________
110107
| | | |
108+
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
109+
| | | | vaddr_end for KASLR
110+
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
111+
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
112+
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
111113
ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
112114
ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
113115
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole

Documentation/x86/zero-page.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Offset Proto Name Meaning
2525
0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
2626
140/080 ALL edid_info Video mode setup (struct edid_info)
2727
1C0/020 ALL efi_info EFI 32 information (struct efi_info)
28-
1E0/004 ALL alk_mem_k Alternative mem check, in KB
28+
1E0/004 ALL alt_mem_k Alternative mem check, in KB
2929
1E4/004 ALL scratch Scratch field for the kernel setup code
3030
1E8/001 ALL e820_entries Number of entries in e820_table (below)
3131
1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)

arch/x86/Kconfig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ config X86_VSMP
525525
bool "ScaleMP vSMP"
526526
select HYPERVISOR_GUEST
527527
select PARAVIRT
528-
select PARAVIRT_XXL
529528
depends on X86_64 && PCI
530529
depends on X86_EXTENDED_PLATFORM
531530
depends on SMP

arch/x86/Makefile

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,6 @@ ifdef CONFIG_X86_64
213213
KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
214214
endif
215215

216-
# Speed up the build
217-
KBUILD_CFLAGS += -pipe
218216
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
219217
KBUILD_CFLAGS += -Wno-sign-compare
220218
#
@@ -239,7 +237,7 @@ archheaders:
239237
archmacros:
240238
$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
241239

242-
ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
240+
ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
243241
export ASM_MACRO_FLAGS
244242
KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
245243

arch/x86/include/asm/mce.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am
221221

222222
int mce_available(struct cpuinfo_x86 *c);
223223
bool mce_is_memory_error(struct mce *m);
224+
bool mce_is_correctable(struct mce *m);
225+
int mce_usable_address(struct mce *m);
224226

225227
DECLARE_PER_CPU(unsigned, mce_exception_count);
226228
DECLARE_PER_CPU(unsigned, mce_poll_count);

arch/x86/include/asm/mshyperv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
232232
: "cc");
233233
}
234234
#endif
235-
return hv_status;
235+
return hv_status;
236236
}
237237

238238
/*

arch/x86/include/asm/page_64_types.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@
3333

3434
/*
3535
* Set __PAGE_OFFSET to the most negative possible address +
36-
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
37-
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
38-
* what Xen requires.
36+
* PGDIR_SIZE*17 (pgd slot 273).
37+
*
38+
* The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for
39+
* a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary,
40+
* but it's what Xen requires.
3941
*/
40-
#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
41-
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
42+
#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL)
43+
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
4244

4345
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
4446
#define __PAGE_OFFSET page_offset_base

arch/x86/include/asm/pgtable_64_types.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d;
111111
*/
112112
#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
113113

114-
#define LDT_PGD_ENTRY_L4 -3UL
115-
#define LDT_PGD_ENTRY_L5 -112UL
116-
#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
114+
#define LDT_PGD_ENTRY -240UL
117115
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
118116
#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE)
119117

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs)
485485
* be somewhat complicated (e.g. segment offset would require an instruction
486486
* parser). So only support physical addresses up to page granuality for now.
487487
*/
488-
static int mce_usable_address(struct mce *m)
488+
int mce_usable_address(struct mce *m)
489489
{
490490
if (!(m->status & MCI_STATUS_ADDRV))
491491
return 0;
@@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m)
505505

506506
return 1;
507507
}
508+
EXPORT_SYMBOL_GPL(mce_usable_address);
508509

509510
bool mce_is_memory_error(struct mce *m)
510511
{
@@ -534,7 +535,7 @@ bool mce_is_memory_error(struct mce *m)
534535
}
535536
EXPORT_SYMBOL_GPL(mce_is_memory_error);
536537

537-
static bool mce_is_correctable(struct mce *m)
538+
bool mce_is_correctable(struct mce *m)
538539
{
539540
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
540541
return false;
@@ -547,6 +548,7 @@ static bool mce_is_correctable(struct mce *m)
547548

548549
return true;
549550
}
551+
EXPORT_SYMBOL_GPL(mce_is_correctable);
550552

551553
static bool cec_add_mce(struct mce *m)
552554
{

arch/x86/kernel/cpu/mshyperv.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/interrupt.h>
2121
#include <linux/irq.h>
2222
#include <linux/kexec.h>
23+
#include <linux/i8253.h>
2324
#include <asm/processor.h>
2425
#include <asm/hypervisor.h>
2526
#include <asm/hyperv-tlfs.h>
@@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void)
295296
if (efi_enabled(EFI_BOOT))
296297
x86_platform.get_nmi_reason = hv_get_nmi_reason;
297298

299+
/*
300+
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
301+
* counter register during PIT shutdown restarts the PIT. So it
302+
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
303+
* to false tells pit_shutdown() not to zero the counter so that
304+
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
305+
* and setting this value has no effect.
306+
*/
307+
i8253_clear_counter_on_shutdown = false;
308+
298309
#if IS_ENABLED(CONFIG_HYPERV)
299310
/*
300311
* Setup the hook to get control post apic initialization.

arch/x86/kernel/cpu/vmware.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s)
7777
}
7878
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
7979

80-
static unsigned long long vmware_sched_clock(void)
80+
static unsigned long long notrace vmware_sched_clock(void)
8181
{
8282
unsigned long long ns;
8383

arch/x86/kernel/ldt.c

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -199,23 +199,14 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm)
199199
/*
200200
* If PTI is enabled, this maps the LDT into the kernelmode and
201201
* usermode tables for the given mm.
202-
*
203-
* There is no corresponding unmap function. Even if the LDT is freed, we
204-
* leave the PTEs around until the slot is reused or the mm is destroyed.
205-
* This is harmless: the LDT is always in ordinary memory, and no one will
206-
* access the freed slot.
207-
*
208-
* If we wanted to unmap freed LDTs, we'd also need to do a flush to make
209-
* it useful, and the flush would slow down modify_ldt().
210202
*/
211203
static int
212204
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
213205
{
214206
unsigned long va;
215207
bool is_vmalloc;
216208
spinlock_t *ptl;
217-
pgd_t *pgd;
218-
int i;
209+
int i, nr_pages;
219210

220211
if (!static_cpu_has(X86_FEATURE_PTI))
221212
return 0;
@@ -229,16 +220,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
229220
/* Check if the current mappings are sane */
230221
sanity_check_ldt_mapping(mm);
231222

232-
/*
233-
* Did we already have the top level entry allocated? We can't
234-
* use pgd_none() for this because it doens't do anything on
235-
* 4-level page table kernels.
236-
*/
237-
pgd = pgd_offset(mm, LDT_BASE_ADDR);
238-
239223
is_vmalloc = is_vmalloc_addr(ldt->entries);
240224

241-
for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
225+
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
226+
227+
for (i = 0; i < nr_pages; i++) {
242228
unsigned long offset = i << PAGE_SHIFT;
243229
const void *src = (char *)ldt->entries + offset;
244230
unsigned long pfn;
@@ -272,20 +258,50 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
272258
/* Propagate LDT mapping to the user page-table */
273259
map_ldt_struct_to_user(mm);
274260

275-
va = (unsigned long)ldt_slot_va(slot);
276-
flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false);
277-
278261
ldt->slot = slot;
279262
return 0;
280263
}
281264

265+
static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
266+
{
267+
unsigned long va;
268+
int i, nr_pages;
269+
270+
if (!ldt)
271+
return;
272+
273+
/* LDT map/unmap is only required for PTI */
274+
if (!static_cpu_has(X86_FEATURE_PTI))
275+
return;
276+
277+
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
278+
279+
for (i = 0; i < nr_pages; i++) {
280+
unsigned long offset = i << PAGE_SHIFT;
281+
spinlock_t *ptl;
282+
pte_t *ptep;
283+
284+
va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
285+
ptep = get_locked_pte(mm, va, &ptl);
286+
pte_clear(mm, va, ptep);
287+
pte_unmap_unlock(ptep, ptl);
288+
}
289+
290+
va = (unsigned long)ldt_slot_va(ldt->slot);
291+
flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
292+
}
293+
282294
#else /* !CONFIG_PAGE_TABLE_ISOLATION */
283295

284296
static int
285297
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
286298
{
287299
return 0;
288300
}
301+
302+
static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
303+
{
304+
}
289305
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
290306

291307
static void free_ldt_pgtables(struct mm_struct *mm)
@@ -524,6 +540,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
524540
}
525541

526542
install_ldt(mm, new_ldt);
543+
unmap_ldt_struct(mm, old_ldt);
527544
free_ldt_struct(old_ldt);
528545
error = 0;
529546

0 commit comments

Comments
 (0)