Skip to content

Commit ce3dc44

Browse files
author
Martin Schwidefsky
committed
s390: add support for virtually mapped kernel stacks
With virtually mapped kernel stacks the kernel stack overflow detection is now fault based, every stack has a guard page in the vmalloc space. The panic_stack is renamed to nodat_stack and is used for all function that need to run without DAT, e.g. memcpy_real or do_start_kdump. The main effect is a reduction in the kernel image size as with vmap stacks the old style overflow checking that adds two instructions per function is not needed anymore. Result from bloat-o-meter: add/remove: 20/1 grow/shrink: 13/26854 up/down: 2198/-216240 (-214042) In regard to performance the micro-benchmark for fork has a hit of a few microseconds, allocating 4 pages in vmalloc space is more expensive compare to an order-2 page allocation. But with real workload I could not find a noticeable difference. Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
1 parent ff340d2 commit ce3dc44

File tree

16 files changed

+225
-88
lines changed

16 files changed

+225
-88
lines changed

arch/s390/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ config S390
125125
select HAVE_ARCH_SOFT_DIRTY
126126
select HAVE_ARCH_TRACEHOOK
127127
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
128+
select HAVE_ARCH_VMAP_STACK
128129
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
129130
select HAVE_CMPXCHG_DOUBLE
130131
select HAVE_CMPXCHG_LOCAL
@@ -649,6 +650,7 @@ config PACK_STACK
649650

650651
config CHECK_STACK
651652
def_bool y
653+
depends on !VMAP_STACK
652654
prompt "Detect kernel stack overflow"
653655
help
654656
This option enables the compiler option -mstack-guard and

arch/s390/include/asm/lowcore.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ struct lowcore {
102102
__u64 current_task; /* 0x0338 */
103103
__u64 kernel_stack; /* 0x0340 */
104104

105-
/* Interrupt, panic and restart stack. */
105+
/* Interrupt, DAT-off and restartstack. */
106106
__u64 async_stack; /* 0x0348 */
107-
__u64 panic_stack; /* 0x0350 */
107+
__u64 nodat_stack; /* 0x0350 */
108108
__u64 restart_stack; /* 0x0358 */
109109

110110
/* Restart function and parameter. */

arch/s390/include/asm/processor.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,14 @@ struct thread_struct {
162162

163163
typedef struct thread_struct thread_struct;
164164

165+
/*
166+
* General size of a stack
167+
*/
168+
#define STACK_ORDER 2
169+
#define STACK_SIZE (PAGE_SIZE << STACK_ORDER)
170+
#define STACK_INIT_OFFSET \
171+
(STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
172+
165173
/*
166174
* Stack layout of a C stack frame.
167175
*/

arch/s390/include/asm/thread_info.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@
1414
* Size of kernel stack for each process
1515
*/
1616
#define THREAD_SIZE_ORDER 2
17-
#define ASYNC_ORDER 2
18-
1917
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
20-
#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
2118

2219
#ifndef __ASSEMBLY__
2320
#include <asm/lowcore.h>

arch/s390/kernel/asm-offsets.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ int main(void)
159159
OFFSET(__LC_CURRENT, lowcore, current_task);
160160
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
161161
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
162-
OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
162+
OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
163163
OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
164164
OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
165165
OFFSET(__LC_RESTART_DATA, lowcore, restart_data);

arch/s390/kernel/base.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
ENTRY(s390_base_mcck_handler)
2020
basr %r13,0
21-
0: lg %r15,__LC_PANIC_STACK # load panic stack
21+
0: lg %r15,__LC_NODAT_STACK # load panic stack
2222
aghi %r15,-STACK_FRAME_OVERHEAD
2323
larl %r1,s390_base_mcck_handler_fn
2424
lg %r9,0(%r1)

arch/s390/kernel/dumpstack.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
7777
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
7878
#ifdef CONFIG_CHECK_STACK
7979
sp = __dump_trace(func, data, sp,
80-
S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
81-
S390_lowcore.panic_stack + frame_size);
80+
S390_lowcore.nodat_stack + frame_size - STACK_SIZE,
81+
S390_lowcore.nodat_stack + frame_size);
8282
#endif
8383
sp = __dump_trace(func, data, sp,
84-
S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
84+
S390_lowcore.async_stack + frame_size - STACK_SIZE,
8585
S390_lowcore.async_stack + frame_size);
8686
task = task ?: current;
8787
__dump_trace(func, data, sp,

arch/s390/kernel/entry.S

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP
8585
#endif
8686
.endm
8787

88-
.macro CHECK_STACK stacksize,savearea
88+
.macro CHECK_STACK savearea
8989
#ifdef CONFIG_CHECK_STACK
90-
tml %r15,\stacksize - CONFIG_STACK_GUARD
90+
tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
9191
lghi %r14,\savearea
9292
jz stack_overflow
9393
#endif
9494
.endm
9595

96+
.macro CHECK_VMAP_STACK savearea,oklabel
97+
#ifdef CONFIG_VMAP_STACK
98+
lgr %r14,%r15
99+
nill %r14,0x10000 - STACK_SIZE
100+
oill %r14,STACK_INIT
101+
clg %r14,__LC_KERNEL_STACK
102+
je \oklabel
103+
clg %r14,__LC_ASYNC_STACK
104+
je \oklabel
105+
clg %r14,__LC_NODAT_STACK
106+
je \oklabel
107+
clg %r14,__LC_RESTART_STACK
108+
je \oklabel
109+
lghi %r14,\savearea
110+
j stack_overflow
111+
#else
112+
j \oklabel
113+
#endif
114+
.endm
115+
96116
.macro SWITCH_ASYNC savearea,timer
97117
tmhh %r8,0x0001 # interrupting from user ?
98118
jnz 1f
@@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP
104124
brasl %r14,cleanup_critical
105125
tmhh %r8,0x0001 # retest problem state after cleanup
106126
jnz 1f
107-
0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
127+
0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
108128
slgr %r14,%r15
109129
srag %r14,%r14,STACK_SHIFT
110130
jnz 2f
111-
CHECK_STACK 1<<STACK_SHIFT,\savearea
131+
CHECK_STACK \savearea
112132
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
113133
j 3f
114134
1: UPDATE_VTIME %r14,%r15,\timer
@@ -600,9 +620,10 @@ ENTRY(pgm_check_handler)
600620
jnz 1f # -> enabled, can't be a double fault
601621
tm __LC_PGM_ILC+3,0x80 # check for per exception
602622
jnz .Lpgm_svcper # -> single stepped svc
603-
1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
623+
1: CHECK_STACK __LC_SAVE_AREA_SYNC
604624
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
605-
j 4f
625+
# CHECK_VMAP_STACK branches to stack_overflow or 4f
626+
CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
606627
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
607628
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
608629
lg %r15,__LC_KERNEL_STACK
@@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler)
11361157
jnz 4f
11371158
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
11381159
jno .Lmcck_panic
1139-
4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
1160+
4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
1161+
SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
11401162
.Lmcck_skip:
11411163
lghi %r14,__LC_GPREGS_SAVE_AREA+64
11421164
stmg %r0,%r7,__PT_R0(%r11)
@@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler)
11631185
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
11641186
la %r11,STACK_FRAME_OVERHEAD(%r1)
11651187
lgr %r15,%r1
1166-
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
11671188
TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
11681189
jno .Lmcck_return
11691190
TRACE_IRQS_OFF
@@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler)
11821203
lpswe __LC_RETURN_MCCK_PSW
11831204

11841205
.Lmcck_panic:
1185-
lg %r15,__LC_PANIC_STACK
1206+
lg %r15,__LC_NODAT_STACK
11861207
la %r11,STACK_FRAME_OVERHEAD(%r15)
11871208
j .Lmcck_skip
11881209

@@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler)
11931214
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
11941215
stg %r15,__LC_SAVE_AREA_RESTART
11951216
lg %r15,__LC_RESTART_STACK
1196-
aghi %r15,-__PT_SIZE # create pt_regs on stack
1197-
xc 0(__PT_SIZE,%r15),0(%r15)
1198-
stmg %r0,%r14,__PT_R0(%r15)
1199-
mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
1200-
mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
1201-
aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
1217+
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
1218+
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
1219+
mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
1220+
mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
12021221
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
12031222
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
12041223
lg %r2,__LC_RESTART_DATA
@@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler)
12161235

12171236
.section .kprobes.text, "ax"
12181237

1219-
#ifdef CONFIG_CHECK_STACK
1238+
#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
12201239
/*
12211240
* The synchronous or the asynchronous stack overflowed. We are dead.
12221241
* No need to properly save the registers, we are going to panic anyway.
12231242
* Setup a pt_regs so that show_trace can provide a good call trace.
12241243
*/
12251244
stack_overflow:
1226-
lg %r15,__LC_PANIC_STACK # change to panic stack
1245+
lg %r15,__LC_NODAT_STACK # change to panic stack
12271246
la %r11,STACK_FRAME_OVERHEAD(%r15)
12281247
stmg %r0,%r7,__PT_R0(%r11)
12291248
stmg %r8,%r9,__PT_PSW(%r11)

arch/s390/kernel/entry.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]);
8686
void gs_load_bc_cb(struct pt_regs *regs);
8787
void set_fs_fixup(void);
8888

89+
unsigned long stack_alloc(void);
90+
void stack_free(unsigned long stack);
91+
8992
#endif /* _ENTRY_H */

arch/s390/kernel/head64.S

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ ENTRY(startup_continue)
3636
#
3737
larl %r14,init_task
3838
stg %r14,__LC_CURRENT
39-
larl %r15,init_thread_union+THREAD_SIZE
40-
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
41-
aghi %r15,-STACK_FRAME_OVERHEAD
39+
larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
4240
#
4341
# Early setup functions that may not rely on an initialized bss section,
4442
# like moving the initrd. Returns with an initialized bss section.

arch/s390/kernel/irq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ void do_softirq_own_stack(void)
171171
old = current_stack_pointer();
172172
/* Check against async. stack address range. */
173173
new = S390_lowcore.async_stack;
174-
if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
174+
if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) {
175175
CALL_ON_STACK(__do_softirq, new, 0);
176176
} else {
177177
/* We are already on the async stack. */

arch/s390/kernel/machine_kexec.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image)
142142
}
143143
#endif
144144

145+
static unsigned long do_start_kdump(unsigned long addr)
146+
{
147+
struct kimage *image = (struct kimage *) addr;
148+
int (*start_kdump)(int) = (void *)image->start;
149+
int rc;
150+
151+
__arch_local_irq_stnsm(0xfb); /* disable DAT */
152+
rc = start_kdump(0);
153+
__arch_local_irq_stosm(0x04); /* enable DAT */
154+
return rc;
155+
}
156+
145157
/*
146158
* Check if kdump checksums are valid: We call purgatory with parameter "0"
147159
*/
148160
static bool kdump_csum_valid(struct kimage *image)
149161
{
150162
#ifdef CONFIG_CRASH_DUMP
151-
int (*start_kdump)(int) = (void *)image->start;
152163
int rc;
153164

154-
__arch_local_irq_stnsm(0xfb); /* disable DAT */
155-
rc = start_kdump(0);
156-
__arch_local_irq_stosm(0x04); /* enable DAT */
165+
rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
157166
return rc == 0;
158167
#else
159168
return false;

arch/s390/kernel/setup.c

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include <linux/crash_dump.h>
5050
#include <linux/memory.h>
5151
#include <linux/compat.h>
52+
#include <linux/start_kernel.h>
5253

5354
#include <asm/ipl.h>
5455
#include <asm/facility.h>
@@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc);
303304

304305
void *restart_stack __section(.data);
305306

307+
unsigned long stack_alloc(void)
308+
{
309+
#ifdef CONFIG_VMAP_STACK
310+
return (unsigned long)
311+
__vmalloc_node_range(STACK_SIZE, STACK_SIZE,
312+
VMALLOC_START, VMALLOC_END,
313+
THREADINFO_GFP,
314+
PAGE_KERNEL, 0, NUMA_NO_NODE,
315+
__builtin_return_address(0));
316+
#else
317+
return __get_free_pages(GFP_KERNEL, STACK_ORDER);
318+
#endif
319+
}
320+
321+
void stack_free(unsigned long stack)
322+
{
323+
#ifdef CONFIG_VMAP_STACK
324+
vfree((void *) stack);
325+
#else
326+
free_pages(stack, STACK_ORDER);
327+
#endif
328+
}
329+
330+
int __init arch_early_irq_init(void)
331+
{
332+
unsigned long stack;
333+
334+
stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
335+
if (!stack)
336+
panic("Couldn't allocate async stack");
337+
S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
338+
return 0;
339+
}
340+
341+
static int __init async_stack_realloc(void)
342+
{
343+
unsigned long old, new;
344+
345+
old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
346+
new = stack_alloc();
347+
if (!new)
348+
panic("Couldn't allocate async stack");
349+
S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
350+
free_pages(old, STACK_ORDER);
351+
return 0;
352+
}
353+
early_initcall(async_stack_realloc);
354+
355+
void __init arch_call_rest_init(void)
356+
{
357+
struct stack_frame *frame;
358+
unsigned long stack;
359+
360+
stack = stack_alloc();
361+
if (!stack)
362+
panic("Couldn't allocate kernel stack");
363+
current->stack = (void *) stack;
364+
#ifdef CONFIG_VMAP_STACK
365+
current->stack_vm_area = (void *) stack;
366+
#endif
367+
set_task_stack_end_magic(current);
368+
stack += STACK_INIT_OFFSET;
369+
S390_lowcore.kernel_stack = stack;
370+
frame = (struct stack_frame *) stack;
371+
memset(frame, 0, sizeof(*frame));
372+
/* Branch to rest_init on the new stack, never returns */
373+
asm volatile(
374+
" la 15,0(%[_frame])\n"
375+
" jg rest_init\n"
376+
: : [_frame] "a" (frame));
377+
}
378+
306379
static void __init setup_lowcore(void)
307380
{
308381
struct lowcore *lc;
@@ -329,14 +402,8 @@ static void __init setup_lowcore(void)
329402
PSW_MASK_DAT | PSW_MASK_MCHECK;
330403
lc->io_new_psw.addr = (unsigned long) io_int_handler;
331404
lc->clock_comparator = clock_comparator_max;
332-
lc->kernel_stack = ((unsigned long) &init_thread_union)
405+
lc->nodat_stack = ((unsigned long) &init_thread_union)
333406
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
334-
lc->async_stack = (unsigned long)
335-
memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
336-
+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
337-
lc->panic_stack = (unsigned long)
338-
memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
339-
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
340407
lc->current_task = (unsigned long)&init_task;
341408
lc->lpp = LPP_MAGIC;
342409
lc->machine_flags = S390_lowcore.machine_flags;
@@ -357,8 +424,12 @@ static void __init setup_lowcore(void)
357424
lc->last_update_timer = S390_lowcore.last_update_timer;
358425
lc->last_update_clock = S390_lowcore.last_update_clock;
359426

360-
restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
361-
restart_stack += ASYNC_SIZE;
427+
/*
428+
* Allocate the global restart stack which is the same for
429+
* all CPUs in cast *one* of them does a PSW restart.
430+
*/
431+
restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE);
432+
restart_stack += STACK_INIT_OFFSET;
362433

363434
/*
364435
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant

0 commit comments

Comments
 (0)