Skip to content

Commit bcd951c

Browse files
committed
watchdog: Use hotplug thread infrastructure
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lkml.kernel.org/r/20120716103948.563736676@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
1 parent 3e339b5 commit bcd951c

File tree

1 file changed

+89
-174
lines changed

1 file changed

+89
-174
lines changed

kernel/watchdog.c

Lines changed: 89 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,26 @@
2222
#include <linux/notifier.h>
2323
#include <linux/module.h>
2424
#include <linux/sysctl.h>
25+
#include <linux/smpboot.h>
2526

2627
#include <asm/irq_regs.h>
2728
#include <linux/kvm_para.h>
2829
#include <linux/perf_event.h>
2930

3031
int watchdog_enabled = 1;
3132
int __read_mostly watchdog_thresh = 10;
33+
static int __read_mostly watchdog_disabled;
3234

3335
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
3436
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
3537
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
3638
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
3739
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
40+
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
41+
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
3842
#ifdef CONFIG_HARDLOCKUP_DETECTOR
3943
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
4044
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
41-
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
4245
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
4346
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
4447
#endif
@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
248251
__this_cpu_write(hard_watchdog_warn, false);
249252
return;
250253
}
254+
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
255+
251256
static void watchdog_interrupt_count(void)
252257
{
253258
__this_cpu_inc(hrtimer_interrupts);
254259
}
255-
#else
256-
static inline void watchdog_interrupt_count(void) { return; }
257-
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
260+
261+
static int watchdog_nmi_enable(unsigned int cpu);
262+
static void watchdog_nmi_disable(unsigned int cpu);
258263

259264
/* watchdog kicker functions */
260265
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
327332
return HRTIMER_RESTART;
328333
}
329334

335+
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
336+
{
337+
struct sched_param param = { .sched_priority = prio };
330338

331-
/*
332-
* The watchdog thread - touches the timestamp.
333-
*/
334-
static int watchdog(void *unused)
339+
sched_setscheduler(current, policy, &param);
340+
}
341+
342+
static void watchdog_enable(unsigned int cpu)
335343
{
336-
struct sched_param param = { .sched_priority = 0 };
337344
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
338345

339-
/* initialize timestamp */
340-
__touch_watchdog();
346+
if (!watchdog_enabled) {
347+
kthread_park(current);
348+
return;
349+
}
350+
351+
/* Enable the perf event */
352+
watchdog_nmi_enable(cpu);
341353

342354
/* kick off the timer for the hardlockup detector */
355+
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
356+
hrtimer->function = watchdog_timer_fn;
357+
343358
/* done here because hrtimer_start can only pin to smp_processor_id() */
344359
hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
345360
HRTIMER_MODE_REL_PINNED);
346361

347-
set_current_state(TASK_INTERRUPTIBLE);
348-
/*
349-
* Run briefly (kicked by the hrtimer callback function) once every
350-
* get_sample_period() seconds (4 seconds by default) to reset the
351-
* softlockup timestamp. If this gets delayed for more than
352-
* 2*watchdog_thresh seconds then the debug-printout triggers in
353-
* watchdog_timer_fn().
354-
*/
355-
while (!kthread_should_stop()) {
356-
__touch_watchdog();
357-
schedule();
362+
/* initialize timestamp */
363+
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
364+
__touch_watchdog();
365+
}
358366

359-
if (kthread_should_stop())
360-
break;
367+
static void watchdog_disable(unsigned int cpu)
368+
{
369+
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
361370

362-
set_current_state(TASK_INTERRUPTIBLE);
363-
}
364-
/*
365-
* Drop the policy/priority elevation during thread exit to avoid a
366-
* scheduling latency spike.
367-
*/
368-
__set_current_state(TASK_RUNNING);
369-
sched_setscheduler(current, SCHED_NORMAL, &param);
370-
return 0;
371+
watchdog_set_prio(SCHED_NORMAL, 0);
372+
hrtimer_cancel(hrtimer);
373+
/* disable the perf event */
374+
watchdog_nmi_disable(cpu);
371375
}
372376

377+
static int watchdog_should_run(unsigned int cpu)
378+
{
379+
return __this_cpu_read(hrtimer_interrupts) !=
380+
__this_cpu_read(soft_lockup_hrtimer_cnt);
381+
}
382+
383+
/*
384+
* The watchdog thread function - touches the timestamp.
385+
*
386+
* It only runs once every get_sample_period() seconds (4 seconds by
387+
* default) to reset the softlockup timestamp. If this gets delayed
388+
* for more than 2*watchdog_thresh seconds then the debug-printout
389+
* triggers in watchdog_timer_fn().
390+
*/
391+
static void watchdog(unsigned int cpu)
392+
{
393+
__this_cpu_write(soft_lockup_hrtimer_cnt,
394+
__this_cpu_read(hrtimer_interrupts));
395+
__touch_watchdog();
396+
}
373397

374398
#ifdef CONFIG_HARDLOCKUP_DETECTOR
375399
/*
@@ -379,7 +403,7 @@ static int watchdog(void *unused)
379403
*/
380404
static unsigned long cpu0_err;
381405

382-
static int watchdog_nmi_enable(int cpu)
406+
static int watchdog_nmi_enable(unsigned int cpu)
383407
{
384408
struct perf_event_attr *wd_attr;
385409
struct perf_event *event = per_cpu(watchdog_ev, cpu);
@@ -433,7 +457,7 @@ static int watchdog_nmi_enable(int cpu)
433457
return 0;
434458
}
435459

436-
static void watchdog_nmi_disable(int cpu)
460+
static void watchdog_nmi_disable(unsigned int cpu)
437461
{
438462
struct perf_event *event = per_cpu(watchdog_ev, cpu);
439463

@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu)
447471
return;
448472
}
449473
#else
450-
static int watchdog_nmi_enable(int cpu) { return 0; }
451-
static void watchdog_nmi_disable(int cpu) { return; }
474+
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
475+
static void watchdog_nmi_disable(unsigned int cpu) { return; }
452476
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
453477

454478
/* prepare/enable/disable routines */
455-
static void watchdog_prepare_cpu(int cpu)
456-
{
457-
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
458-
459-
WARN_ON(per_cpu(softlockup_watchdog, cpu));
460-
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
461-
hrtimer->function = watchdog_timer_fn;
462-
}
463-
464-
static int watchdog_enable(int cpu)
465-
{
466-
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
467-
int err = 0;
468-
469-
/* enable the perf event */
470-
err = watchdog_nmi_enable(cpu);
471-
472-
/* Regardless of err above, fall through and start softlockup */
473-
474-
/* create the watchdog thread */
475-
if (!p) {
476-
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
477-
p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
478-
if (IS_ERR(p)) {
479-
pr_err("softlockup watchdog for %i failed\n", cpu);
480-
if (!err) {
481-
/* if hardlockup hasn't already set this */
482-
err = PTR_ERR(p);
483-
/* and disable the perf event */
484-
watchdog_nmi_disable(cpu);
485-
}
486-
goto out;
487-
}
488-
sched_setscheduler(p, SCHED_FIFO, &param);
489-
kthread_bind(p, cpu);
490-
per_cpu(watchdog_touch_ts, cpu) = 0;
491-
per_cpu(softlockup_watchdog, cpu) = p;
492-
wake_up_process(p);
493-
}
494-
495-
out:
496-
return err;
497-
}
498-
499-
static void watchdog_disable(int cpu)
500-
{
501-
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
502-
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
503-
504-
/*
505-
* cancel the timer first to stop incrementing the stats
506-
* and waking up the kthread
507-
*/
508-
hrtimer_cancel(hrtimer);
509-
510-
/* disable the perf event */
511-
watchdog_nmi_disable(cpu);
512-
513-
/* stop the watchdog thread */
514-
if (p) {
515-
per_cpu(softlockup_watchdog, cpu) = NULL;
516-
kthread_stop(p);
517-
}
518-
}
519-
520479
/* sysctl functions */
521480
#ifdef CONFIG_SYSCTL
522481
static void watchdog_enable_all_cpus(void)
523482
{
524-
int cpu;
525-
526-
watchdog_enabled = 0;
527-
528-
for_each_online_cpu(cpu)
529-
if (!watchdog_enable(cpu))
530-
/* if any cpu succeeds, watchdog is considered
531-
enabled for the system */
532-
watchdog_enabled = 1;
533-
534-
if (!watchdog_enabled)
535-
pr_err("failed to be enabled on some cpus\n");
483+
unsigned int cpu;
536484

485+
if (watchdog_disabled) {
486+
watchdog_disabled = 0;
487+
for_each_online_cpu(cpu)
488+
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
489+
}
537490
}
538491

539492
static void watchdog_disable_all_cpus(void)
540493
{
541-
int cpu;
542-
543-
for_each_online_cpu(cpu)
544-
watchdog_disable(cpu);
494+
unsigned int cpu;
545495

546-
/* if all watchdogs are disabled, then they are disabled for the system */
547-
watchdog_enabled = 0;
496+
if (!watchdog_disabled) {
497+
watchdog_disabled = 1;
498+
for_each_online_cpu(cpu)
499+
kthread_park(per_cpu(softlockup_watchdog, cpu));
500+
}
548501
}
549502

550-
551503
/*
552504
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
553505
*/
@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write,
557509
{
558510
int ret;
559511

512+
if (watchdog_disabled < 0)
513+
return -ENODEV;
514+
560515
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
561516
if (ret || !write)
562-
goto out;
517+
return ret;
563518

564519
if (watchdog_enabled && watchdog_thresh)
565520
watchdog_enable_all_cpus();
566521
else
567522
watchdog_disable_all_cpus();
568523

569-
out:
570524
return ret;
571525
}
572526
#endif /* CONFIG_SYSCTL */
573527

574-
575-
/*
576-
* Create/destroy watchdog threads as CPUs come and go:
577-
*/
578-
static int __cpuinit
579-
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
580-
{
581-
int hotcpu = (unsigned long)hcpu;
582-
583-
switch (action) {
584-
case CPU_UP_PREPARE:
585-
case CPU_UP_PREPARE_FROZEN:
586-
watchdog_prepare_cpu(hotcpu);
587-
break;
588-
case CPU_ONLINE:
589-
case CPU_ONLINE_FROZEN:
590-
if (watchdog_enabled)
591-
watchdog_enable(hotcpu);
592-
break;
593-
#ifdef CONFIG_HOTPLUG_CPU
594-
case CPU_UP_CANCELED:
595-
case CPU_UP_CANCELED_FROZEN:
596-
watchdog_disable(hotcpu);
597-
break;
598-
case CPU_DEAD:
599-
case CPU_DEAD_FROZEN:
600-
watchdog_disable(hotcpu);
601-
break;
602-
#endif /* CONFIG_HOTPLUG_CPU */
603-
}
604-
605-
/*
606-
* hardlockup and softlockup are not important enough
607-
* to block cpu bring up. Just always succeed and
608-
* rely on printk output to flag problems.
609-
*/
610-
return NOTIFY_OK;
611-
}
612-
613-
static struct notifier_block __cpuinitdata cpu_nfb = {
614-
.notifier_call = cpu_callback
528+
static struct smp_hotplug_thread watchdog_threads = {
529+
.store = &softlockup_watchdog,
530+
.thread_should_run = watchdog_should_run,
531+
.thread_fn = watchdog,
532+
.thread_comm = "watchdog/%u",
533+
.setup = watchdog_enable,
534+
.park = watchdog_disable,
535+
.unpark = watchdog_enable,
615536
};
616537

617538
void __init lockup_detector_init(void)
618539
{
619-
void *cpu = (void *)(long)smp_processor_id();
620-
int err;
621-
622-
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
623-
WARN_ON(notifier_to_errno(err));
624-
625-
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
626-
register_cpu_notifier(&cpu_nfb);
627-
628-
return;
540+
if (smpboot_register_percpu_thread(&watchdog_threads)) {
541+
pr_err("Failed to create watchdog threads, disabled\n");
542+
watchdog_disabled = -ENODEV;
543+
}
629544
}

0 commit comments

Comments
 (0)