Skip to content

Commit d0a9964

Browse files
Mike TravisIngo Molnar
authored andcommitted
x86/platform/uv: Implement simple dump failover if kdump fails
The ability to trigger a kdump using the system NMI command was added by commit 12ba6c9 ("x86/UV: Add kdump to UV NMI handler") Author: Mike Travis <travis@sgi.com> Date: Mon Sep 23 16:25:03 2013 -0500 This is useful because when kdump is working the information gathered is more informative than the original per CPU stack traces or "dump" option. However a number of things can go wrong with kdump and then the stack traces are more useful than nothing. The two most common reasons for kdump to not be available are: 1) if a problem occurs during boot before the kdump service is started, or 2) the kdump daemon failed to start. In either case the call to crash_kexec() returns unexpectedly. When this happens uv_nmi_kdump() also sets the uv_nmi_kexec_failed flag which causes the slave CPU's to also return to the NMI handler. Upon this unexpected return to the NMI handler, the NMI handler will revert to the "dump" action which uses show_regs() to obtain a process trace dump for all the CPU's. Other minor changes: The "dump" action now generates both the show_regs() stack trace and show instruction pointer information. Whereas the "ips" action only shows instruction pointers for non-idle CPU's. This is more like an abbreviated "ps" display. Change printk(KERN_DEFAULT...) --> pr_info() Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: George Beshers <gbeshers@sgi.com> Cc: Alex Thorlton <athorlton@sgi.com> Cc: Dimitri Sivanich <sivanich@sgi.com> Cc: Hedi Berriche <hedi@sgi.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russ Anderson <rja@sgi.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 7c52198 commit d0a9964

File tree

1 file changed

+30
-23
lines changed

1 file changed

+30
-23
lines changed

arch/x86/platform/uv/uv_nmi.c

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -376,38 +376,42 @@ static void uv_nmi_wait(int master)
376376
atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
377377
}
378378

379+
/* Dump Instruction Pointer header */
379380
static void uv_nmi_dump_cpu_ip_hdr(void)
380381
{
381-
printk(KERN_DEFAULT
382-
"\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
382+
pr_info("\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
383383
"CPU", "PID", "COMMAND", "IP");
384384
}
385385

386+
/* Dump Instruction Pointer info */
386387
static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
387388
{
388-
printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ",
389-
cpu, current->pid, current->comm);
390-
389+
pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
391390
printk_address(regs->ip);
392391
}
393392

394-
/* Dump this cpu's state */
393+
/*
394+
* Dump this CPU's state. If action was set to "kdump" and the crash_kexec
395+
* failed, then we provide "dump" as an alternate action. Action "dump" now
396+
* also includes the show "ips" (instruction pointers) action whereas the
397+
* action "ips" only displays instruction pointers for the non-idle CPU's.
398+
* This is an abbreviated form of the "ps" command.
399+
*/
395400
static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
396401
{
397402
const char *dots = " ................................. ";
398403

399-
if (uv_nmi_action_is("ips")) {
400-
if (cpu == 0)
401-
uv_nmi_dump_cpu_ip_hdr();
404+
if (cpu == 0)
405+
uv_nmi_dump_cpu_ip_hdr();
402406

403-
if (current->pid != 0)
404-
uv_nmi_dump_cpu_ip(cpu, regs);
407+
if (current->pid != 0 || !uv_nmi_action_is("ips"))
408+
uv_nmi_dump_cpu_ip(cpu, regs);
405409

406-
} else if (uv_nmi_action_is("dump")) {
407-
printk(KERN_DEFAULT
408-
"UV:%sNMI process trace for CPU %d\n", dots, cpu);
410+
if (uv_nmi_action_is("dump")) {
411+
pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
409412
show_regs(regs);
410413
}
414+
411415
this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
412416
}
413417

@@ -469,8 +473,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
469473
uv_nmi_trigger_dump(tcpu);
470474
}
471475
if (ignored)
472-
printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n",
473-
ignored);
476+
pr_alert("UV: %d CPUs ignored NMI\n", ignored);
474477

475478
console_loglevel = saved_console_loglevel;
476479
pr_alert("UV: process trace complete\n");
@@ -492,8 +495,9 @@ static void uv_nmi_touch_watchdogs(void)
492495
touch_nmi_watchdog();
493496
}
494497

495-
#if defined(CONFIG_KEXEC_CORE)
496498
static atomic_t uv_nmi_kexec_failed;
499+
500+
#if defined(CONFIG_KEXEC_CORE)
497501
static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
498502
{
499503
/* Call crash to dump system state */
@@ -502,10 +506,9 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
502506
crash_kexec(regs);
503507

504508
pr_emerg("UV: crash_kexec unexpectedly returned, ");
509+
atomic_set(&uv_nmi_kexec_failed, 1);
505510
if (!kexec_crash_image) {
506511
pr_cont("crash kernel not loaded\n");
507-
atomic_set(&uv_nmi_kexec_failed, 1);
508-
uv_nmi_sync_exit(1);
509512
return;
510513
}
511514
pr_cont("kexec busy, stalling cpus while waiting\n");
@@ -514,16 +517,14 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
514517
/* If crash exec fails the slaves should return, otherwise stall */
515518
while (atomic_read(&uv_nmi_kexec_failed) == 0)
516519
mdelay(10);
517-
518-
/* Crash kernel most likely not loaded, return in an orderly fashion */
519-
uv_nmi_sync_exit(0);
520520
}
521521

522522
#else /* !CONFIG_KEXEC_CORE */
523523
static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
524524
{
525525
if (master)
526526
pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
527+
atomic_set(&uv_nmi_kexec_failed, 1);
527528
}
528529
#endif /* !CONFIG_KEXEC_CORE */
529530

@@ -613,9 +614,14 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
613614
master = (atomic_read(&uv_nmi_cpu) == cpu);
614615

615616
/* If NMI action is "kdump", then attempt to do it */
616-
if (uv_nmi_action_is("kdump"))
617+
if (uv_nmi_action_is("kdump")) {
617618
uv_nmi_kdump(cpu, master, regs);
618619

620+
/* Unexpected return, revert action to "dump" */
621+
if (master)
622+
strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
623+
}
624+
619625
/* Pause as all cpus enter the NMI handler */
620626
uv_nmi_wait(master);
621627

@@ -640,6 +646,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
640646
atomic_set(&uv_nmi_cpus_in_nmi, -1);
641647
atomic_set(&uv_nmi_cpu, -1);
642648
atomic_set(&uv_in_nmi, 0);
649+
atomic_set(&uv_nmi_kexec_failed, 0);
643650
}
644651

645652
uv_nmi_touch_watchdogs();

0 commit comments

Comments
 (0)