Skip to content

Commit 01ea443

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "This is bigger than usual - the reason is partly a pent-up stream of fixes after the merge window and partly accidental. The fixes are: - five patches to fix a boot failure on Andy Lutomirsky's laptop - four SGI UV platform fixes - KASAN fix - warning fix - documentation update - swap entry definition fix - pkeys fix - irq stats fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe() x86/efi: Allocate a trampoline if needed in efi_free_boot_services() x86/boot: Rework reserve_real_mode() to allow multiple tries x86/boot: Defer setup_real_mode() to early_initcall time x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly x86/boot: Run reserve_bios_regions() after we initialize the memory map x86/irq: Do not substract irq_tlb_count from irq_call_count x86/mm: Fix swap entry comment and macro x86/mm/kaslr: Fix -Wformat-security warning x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET x86/mm: Disable preemption during CR3 read+write x86/mm/KASLR: Increase BRK pages for KASLR memory randomization x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
2 parents 3bc6d8c + d52c056 commit 01ea443

File tree

20 files changed

+182
-195
lines changed

20 files changed

+182
-195
lines changed

arch/x86/entry/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
66
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
77

8+
CFLAGS_syscall_64.o += -Wno-override-init
9+
CFLAGS_syscall_32.o += -Wno-override-init
810
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
911
obj-y += common.o
1012

arch/x86/entry/entry_64.S

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,15 @@ return_from_SYSCALL_64:
288288
jne opportunistic_sysret_failed
289289

290290
/*
291-
* SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
292-
* restoring TF results in a trap from userspace immediately after
293-
* SYSRET. This would cause an infinite loop whenever #DB happens
294-
* with register state that satisfies the opportunistic SYSRET
295-
* conditions. For example, single-stepping this user code:
291+
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
292+
* restore RF properly. If the slowpath sets it for whatever reason, we
293+
* need to restore it correctly.
294+
*
295+
* SYSRET can restore TF, but unlike IRET, restoring TF results in a
296+
* trap from userspace immediately after SYSRET. This would cause an
297+
* infinite loop whenever #DB happens with register state that satisfies
298+
* the opportunistic SYSRET conditions. For example, single-stepping
299+
* this user code:
296300
*
297301
* movq $stuck_here, %rcx
298302
* pushfq
@@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
601605
.endm
602606
#endif
603607

608+
/* Make sure APIC interrupt handlers end up in the irqentry section: */
609+
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
610+
# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
611+
# define POP_SECTION_IRQENTRY .popsection
612+
#else
613+
# define PUSH_SECTION_IRQENTRY
614+
# define POP_SECTION_IRQENTRY
615+
#endif
616+
604617
.macro apicinterrupt num sym do_sym
618+
PUSH_SECTION_IRQENTRY
605619
apicinterrupt3 \num \sym \do_sym
606620
trace_apicinterrupt \num \sym
621+
POP_SECTION_IRQENTRY
607622
.endm
608623

609624
#ifdef CONFIG_SMP

arch/x86/include/asm/hardirq.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ typedef struct {
2222
#ifdef CONFIG_SMP
2323
unsigned int irq_resched_count;
2424
unsigned int irq_call_count;
25-
/*
26-
* irq_tlb_count is double-counted in irq_call_count, so it must be
27-
* subtracted from irq_call_count when displaying irq_call_count
28-
*/
2925
unsigned int irq_tlb_count;
3026
#endif
3127
#ifdef CONFIG_X86_THERMAL_VECTOR

arch/x86/include/asm/pgtable_64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
145145
*
146146
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
147147
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
148-
* | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
148+
* | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
149149
*
150150
* G (8) is aliased and used as a PROT_NONE indicator for
151151
* !present ptes. We need to start storing swap entries above
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
156156
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
157157
#define SWP_TYPE_BITS 5
158158
/* Place the offset above the type: */
159-
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
159+
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
160160

161161
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
162162

arch/x86/include/asm/realmode.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
5858
extern unsigned char secondary_startup_64[];
5959
#endif
6060

61+
static inline size_t real_mode_size_needed(void)
62+
{
63+
if (real_mode_header)
64+
return 0; /* already allocated. */
65+
66+
return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
67+
}
68+
69+
void set_real_mode_mem(phys_addr_t mem, size_t size);
6170
void reserve_real_mode(void);
62-
void setup_real_mode(void);
6371

6472
#endif /* _ARCH_X86_REALMODE_H */

arch/x86/include/asm/tlbflush.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
135135

136136
static inline void __native_flush_tlb(void)
137137
{
138+
/*
139+
* If current->mm == NULL then we borrow a mm which may change during a
140+
* task switch and therefore we must not be preempted while we write CR3
141+
* back:
142+
*/
143+
preempt_disable();
138144
native_write_cr3(native_read_cr3());
145+
preempt_enable();
139146
}
140147

141148
static inline void __native_flush_tlb_global_irq_disabled(void)

arch/x86/include/asm/uv/bios.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct uv_gam_range_entry {
7979
u16 nasid; /* HNasid */
8080
u16 sockid; /* Socket ID, high bits of APIC ID */
8181
u16 pnode; /* Index to MMR and GRU spaces */
82-
u32 pxm; /* ACPI proximity domain number */
82+
u32 unused2;
8383
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
8484
};
8585

@@ -88,7 +88,8 @@ struct uv_gam_range_entry {
8888
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
8989
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
9090
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
91-
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
91+
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
92+
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
9293

9394
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
9495
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */

arch/x86/kernel/apic/x2apic_cluster.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
155155
/*
156156
* At CPU state changes, update the x2apic cluster sibling info.
157157
*/
158-
int x2apic_prepare_cpu(unsigned int cpu)
158+
static int x2apic_prepare_cpu(unsigned int cpu)
159159
{
160160
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
161161
return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
168168
return 0;
169169
}
170170

171-
int x2apic_dead_cpu(unsigned int this_cpu)
171+
static int x2apic_dead_cpu(unsigned int this_cpu)
172172
{
173173
int cpu;
174174

@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
186186
static int x2apic_cluster_probe(void)
187187
{
188188
int cpu = smp_processor_id();
189+
int ret;
189190

190191
if (!x2apic_mode)
191192
return 0;
192193

194+
ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
195+
x2apic_prepare_cpu, x2apic_dead_cpu);
196+
if (ret < 0) {
197+
pr_err("Failed to register X2APIC_PREPARE\n");
198+
return 0;
199+
}
193200
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
194-
cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
195-
x2apic_prepare_cpu, x2apic_dead_cpu);
196201
return 1;
197202
}
198203

arch/x86/kernel/apic/x2apic_uv_x.c

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
223223
if (strncmp(oem_id, "SGI", 3) != 0)
224224
return 0;
225225

226+
if (numa_off) {
227+
pr_err("UV: NUMA is off, disabling UV support\n");
228+
return 0;
229+
}
230+
226231
/* Setup early hub type field in uv_hub_info for Node 0 */
227232
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
228233

@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
325330
struct uv_gam_range_entry *gre = uv_gre_table;
326331
struct uv_gam_range_s *grt;
327332
unsigned long last_limit = 0, ram_limit = 0;
328-
int bytes, i, sid, lsid = -1;
333+
int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
329334

330335
if (!gre)
331336
return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
356361
}
357362
sid = gre->sockid - _min_socket;
358363
if (lsid < sid) { /* new range */
359-
grt = &_gr_table[sid];
360-
grt->base = lsid;
364+
grt = &_gr_table[indx];
365+
grt->base = lindx;
361366
grt->nasid = gre->nasid;
362367
grt->limit = last_limit = gre->limit;
363368
lsid = sid;
369+
lindx = indx++;
364370
continue;
365371
}
366372
if (lsid == sid && !ram_limit) { /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
371377
}
372378
if (!ram_limit) { /* non-contiguous ram range */
373379
grt++;
374-
grt->base = sid - 1;
380+
grt->base = lindx;
375381
grt->nasid = gre->nasid;
376382
grt->limit = last_limit = gre->limit;
377383
continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
11551161
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
11561162
if (!index) {
11571163
pr_info("UV: GAM Range Table...\n");
1158-
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
1164+
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
11591165
"Range", "", "Size", "Type", "NASID",
1160-
"SID", "PN", "PXM");
1166+
"SID", "PN");
11611167
}
11621168
pr_info(
1163-
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
1169+
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
11641170
index++,
11651171
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
11661172
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
11671173
((unsigned long)(gre->limit - lgre)) >>
11681174
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
1169-
gre->type, gre->nasid, gre->sockid,
1170-
gre->pnode, gre->pxm);
1175+
gre->type, gre->nasid, gre->sockid, gre->pnode);
11711176

11721177
lgre = gre->limit;
11731178
if (sock_min > gre->sockid)
@@ -1286,28 +1291,26 @@ static void __init build_socket_tables(void)
12861291
_pnode_to_socket[i] = SOCK_EMPTY;
12871292

12881293
/* fill in pnode/node/addr conversion list values */
1289-
pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
1294+
pr_info("UV: GAM Building socket/pnode conversion tables\n");
12901295
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
12911296
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
12921297
continue;
12931298
i = gre->sockid - minsock;
12941299
if (_socket_to_pnode[i] != SOCK_EMPTY)
12951300
continue; /* duplicate */
12961301
_socket_to_pnode[i] = gre->pnode;
1297-
_socket_to_node[i] = gre->pxm;
12981302

12991303
i = gre->pnode - minpnode;
13001304
_pnode_to_socket[i] = gre->sockid;
13011305

13021306
pr_info(
1303-
"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
1307+
"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
13041308
gre->sockid, gre->type, gre->nasid,
13051309
_socket_to_pnode[gre->sockid - minsock],
1306-
_socket_to_node[gre->sockid - minsock],
13071310
_pnode_to_socket[gre->pnode - minpnode]);
13081311
}
13091312

1310-
/* check socket -> node values */
1313+
/* Set socket -> node values */
13111314
lnid = -1;
13121315
for_each_present_cpu(cpu) {
13131316
int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
13181321
lnid = nid;
13191322
apicid = per_cpu(x86_cpu_to_apicid, cpu);
13201323
sockid = apicid >> uv_cpuid.socketid_shift;
1321-
i = sockid - minsock;
1322-
1323-
if (nid != _socket_to_node[i]) {
1324-
pr_warn(
1325-
"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
1326-
i, sockid, gre->type, _socket_to_node[i], nid);
1327-
_socket_to_node[i] = nid;
1328-
}
1324+
_socket_to_node[sockid - minsock] = nid;
1325+
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
1326+
sockid, apicid, nid);
13291327
}
13301328

13311329
/* Setup physical blade to pnode translation from GAM Range Table */

0 commit comments

Comments
 (0)