Skip to content

Commit 40b46a7

Browse files
author
H. Peter Anvin
committed
Merge remote-tracking branch 'rostedt/tip/perf/urgent-2' into x86-urgent-for-linus
2 parents bad1a75 + 5963e31 commit 40b46a7

File tree

6 files changed

+154
-16
lines changed

6 files changed

+154
-16
lines changed

arch/x86/include/asm/ftrace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
#ifndef __ASSEMBLY__
3636
extern void mcount(void);
37-
extern int modifying_ftrace_code;
37+
extern atomic_t modifying_ftrace_code;
3838

3939
static inline unsigned long ftrace_call_adjust(unsigned long addr)
4040
{

arch/x86/kernel/cpu/common.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
11011101
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
11021102
}
11031103

1104+
static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
1105+
11041106
void debug_stack_set_zero(void)
11051107
{
1108+
this_cpu_inc(debug_stack_use_ctr);
11061109
load_idt((const struct desc_ptr *)&nmi_idt_descr);
11071110
}
11081111

11091112
void debug_stack_reset(void)
11101113
{
1111-
load_idt((const struct desc_ptr *)&idt_descr);
1114+
if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
1115+
return;
1116+
if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
1117+
load_idt((const struct desc_ptr *)&idt_descr);
11121118
}
11131119

11141120
#else /* CONFIG_X86_64 */

arch/x86/kernel/entry_64.S

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,44 @@ ENDPROC(native_usergs_sysret64)
190190
#endif
191191
.endm
192192

193+
/*
194+
* When dynamic function tracer is enabled it will add a breakpoint
195+
* to all locations that it is about to modify, sync CPUs, update
196+
* all the code, sync CPUs, then remove the breakpoints. In this time
197+
* if lockdep is enabled, it might jump back into the debug handler
198+
* outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
199+
*
200+
* We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
201+
* make sure the stack pointer does not get reset back to the top
202+
* of the debug stack, and instead just reuses the current stack.
203+
*/
204+
#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
205+
206+
.macro TRACE_IRQS_OFF_DEBUG
207+
call debug_stack_set_zero
208+
TRACE_IRQS_OFF
209+
call debug_stack_reset
210+
.endm
211+
212+
.macro TRACE_IRQS_ON_DEBUG
213+
call debug_stack_set_zero
214+
TRACE_IRQS_ON
215+
call debug_stack_reset
216+
.endm
217+
218+
.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
219+
bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
220+
jnc 1f
221+
TRACE_IRQS_ON_DEBUG
222+
1:
223+
.endm
224+
225+
#else
226+
# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
227+
# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
228+
# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
229+
#endif
230+
193231
/*
194232
* C code is not supposed to know about undefined top of stack. Every time
195233
* a C function with an pt_regs argument is called from the SYSCALL based
@@ -1098,7 +1136,7 @@ ENTRY(\sym)
10981136
subq $ORIG_RAX-R15, %rsp
10991137
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
11001138
call save_paranoid
1101-
TRACE_IRQS_OFF
1139+
TRACE_IRQS_OFF_DEBUG
11021140
movq %rsp,%rdi /* pt_regs pointer */
11031141
xorl %esi,%esi /* no error code */
11041142
subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
13931431
ENTRY(paranoid_exit)
13941432
DEFAULT_FRAME
13951433
DISABLE_INTERRUPTS(CLBR_NONE)
1396-
TRACE_IRQS_OFF
1434+
TRACE_IRQS_OFF_DEBUG
13971435
testl %ebx,%ebx /* swapgs needed? */
13981436
jnz paranoid_restore
13991437
testl $3,CS(%rsp)
@@ -1404,7 +1442,7 @@ paranoid_swapgs:
14041442
RESTORE_ALL 8
14051443
jmp irq_return
14061444
paranoid_restore:
1407-
TRACE_IRQS_IRETQ 0
1445+
TRACE_IRQS_IRETQ_DEBUG 0
14081446
RESTORE_ALL 8
14091447
jmp irq_return
14101448
paranoid_userspace:

arch/x86/kernel/ftrace.c

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
100100
}
101101

102102
static int
103-
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
103+
ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
104104
unsigned const char *new_code)
105105
{
106106
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
141141
old = ftrace_call_replace(ip, addr);
142142
new = ftrace_nop_replace();
143143

144-
return ftrace_modify_code(rec->ip, old, new);
144+
/*
145+
* On boot up, and when modules are loaded, the MCOUNT_ADDR
146+
* is converted to a nop, and will never become MCOUNT_ADDR
147+
* again. This code is either running before SMP (on boot up)
148+
* or before the code will ever be executed (module load).
149+
* We do not want to use the breakpoint version in this case,
150+
* just modify the code directly.
151+
*/
152+
if (addr == MCOUNT_ADDR)
153+
return ftrace_modify_code_direct(rec->ip, old, new);
154+
155+
/* Normal cases use add_brk_on_nop */
156+
WARN_ONCE(1, "invalid use of ftrace_make_nop");
157+
return -EINVAL;
145158
}
146159

147160
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
152165
old = ftrace_nop_replace();
153166
new = ftrace_call_replace(ip, addr);
154167

155-
return ftrace_modify_code(rec->ip, old, new);
168+
/* Should only be called when module is loaded */
169+
return ftrace_modify_code_direct(rec->ip, old, new);
156170
}
157171

172+
/*
173+
* The modifying_ftrace_code is used to tell the breakpoint
174+
* handler to call ftrace_int3_handler(). If it fails to
175+
* call this handler for a breakpoint added by ftrace, then
176+
* the kernel may crash.
177+
*
178+
* As atomic_writes on x86 do not need a barrier, we do not
179+
* need to add smp_mb()s for this to work. It is also considered
180+
* that we can not read the modifying_ftrace_code before
181+
* executing the breakpoint. That would be quite remarkable if
182+
* it could do that. Here's the flow that is required:
183+
*
184+
* CPU-0 CPU-1
185+
*
186+
* atomic_inc(mfc);
187+
* write int3s
188+
* <trap-int3> // implicit (r)mb
189+
* if (atomic_read(mfc))
190+
* call ftrace_int3_handler()
191+
*
192+
* Then when we are finished:
193+
*
194+
* atomic_dec(mfc);
195+
*
196+
* If we hit a breakpoint that was not set by ftrace, it does not
197+
* matter if ftrace_int3_handler() is called or not. It will
198+
* simply be ignored. But it is crucial that a ftrace nop/caller
199+
* breakpoint is handled. No other user should ever place a
200+
* breakpoint on an ftrace nop/caller location. It must only
201+
* be done by this code.
202+
*/
203+
atomic_t modifying_ftrace_code __read_mostly;
204+
205+
static int
206+
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
207+
unsigned const char *new_code);
208+
158209
int ftrace_update_ftrace_func(ftrace_func_t func)
159210
{
160211
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
163214

164215
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
165216
new = ftrace_call_replace(ip, (unsigned long)func);
217+
218+
/* See comment above by declaration of modifying_ftrace_code */
219+
atomic_inc(&modifying_ftrace_code);
220+
166221
ret = ftrace_modify_code(ip, old, new);
167222

223+
atomic_dec(&modifying_ftrace_code);
224+
168225
return ret;
169226
}
170227

171-
int modifying_ftrace_code __read_mostly;
172-
173228
/*
174229
* A breakpoint was added to the code address we are about to
175230
* modify, and this is the handle that will just skip over it.
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable)
489544
}
490545
}
491546

547+
static int
548+
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
549+
unsigned const char *new_code)
550+
{
551+
int ret;
552+
553+
ret = add_break(ip, old_code);
554+
if (ret)
555+
goto out;
556+
557+
run_sync();
558+
559+
ret = add_update_code(ip, new_code);
560+
if (ret)
561+
goto fail_update;
562+
563+
run_sync();
564+
565+
ret = ftrace_write(ip, new_code, 1);
566+
if (ret) {
567+
ret = -EPERM;
568+
goto out;
569+
}
570+
run_sync();
571+
out:
572+
return ret;
573+
574+
fail_update:
575+
probe_kernel_write((void *)ip, &old_code[0], 1);
576+
goto out;
577+
}
578+
492579
void arch_ftrace_update_code(int command)
493580
{
494-
modifying_ftrace_code++;
581+
/* See comment above by declaration of modifying_ftrace_code */
582+
atomic_inc(&modifying_ftrace_code);
495583

496584
ftrace_modify_all_code(command);
497585

498-
modifying_ftrace_code--;
586+
atomic_dec(&modifying_ftrace_code);
499587
}
500588

501589
int __init ftrace_dyn_arch_init(void *data)

arch/x86/kernel/nmi.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
444444
*/
445445
if (unlikely(is_debug_stack(regs->sp))) {
446446
debug_stack_set_zero();
447-
__get_cpu_var(update_debug_stack) = 1;
447+
this_cpu_write(update_debug_stack, 1);
448448
}
449449
}
450450

451451
static inline void nmi_nesting_postprocess(void)
452452
{
453-
if (unlikely(__get_cpu_var(update_debug_stack)))
453+
if (unlikely(this_cpu_read(update_debug_stack))) {
454454
debug_stack_reset();
455+
this_cpu_write(update_debug_stack, 0);
456+
}
455457
}
456458
#endif
457459

arch/x86/kernel/traps.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
303303
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
304304
{
305305
#ifdef CONFIG_DYNAMIC_FTRACE
306-
/* ftrace must be first, everything else may cause a recursive crash */
307-
if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
306+
/*
307+
* ftrace must be first, everything else may cause a recursive crash.
308+
* See note by declaration of modifying_ftrace_code in ftrace.c
309+
*/
310+
if (unlikely(atomic_read(&modifying_ftrace_code)) &&
311+
ftrace_int3_handler(regs))
308312
return;
309313
#endif
310314
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP

0 commit comments

Comments
 (0)