22
22
#include <linux/notifier.h>
23
23
#include <linux/module.h>
24
24
#include <linux/sysctl.h>
25
+ #include <linux/smpboot.h>
25
26
26
27
#include <asm/irq_regs.h>
27
28
#include <linux/kvm_para.h>
28
29
#include <linux/perf_event.h>
29
30
30
31
int watchdog_enabled = 1 ;
31
32
int __read_mostly watchdog_thresh = 10 ;
33
+ static int __read_mostly watchdog_disabled ;
32
34
33
35
static DEFINE_PER_CPU (unsigned long , watchdog_touch_ts ) ;
34
36
static DEFINE_PER_CPU (struct task_struct * , softlockup_watchdog ) ;
35
37
static DEFINE_PER_CPU (struct hrtimer , watchdog_hrtimer ) ;
36
38
static DEFINE_PER_CPU (bool , softlockup_touch_sync ) ;
37
39
static DEFINE_PER_CPU (bool , soft_watchdog_warn ) ;
40
+ static DEFINE_PER_CPU (unsigned long, hrtimer_interrupts ) ;
41
+ static DEFINE_PER_CPU (unsigned long, soft_lockup_hrtimer_cnt ) ;
38
42
#ifdef CONFIG_HARDLOCKUP_DETECTOR
39
43
static DEFINE_PER_CPU (bool , hard_watchdog_warn ) ;
40
44
static DEFINE_PER_CPU (bool , watchdog_nmi_touch ) ;
41
- static DEFINE_PER_CPU (unsigned long, hrtimer_interrupts ) ;
42
45
static DEFINE_PER_CPU (unsigned long, hrtimer_interrupts_saved ) ;
43
46
static DEFINE_PER_CPU (struct perf_event * , watchdog_ev ) ;
44
47
#endif
@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
248
251
__this_cpu_write (hard_watchdog_warn , false);
249
252
return ;
250
253
}
254
+ #endif /* CONFIG_HARDLOCKUP_DETECTOR */
255
+
251
256
static void watchdog_interrupt_count (void )
252
257
{
253
258
__this_cpu_inc (hrtimer_interrupts );
254
259
}
255
- #else
256
- static inline void watchdog_interrupt_count ( void ) { return ; }
257
- #endif /* CONFIG_HARDLOCKUP_DETECTOR */
260
+
261
+ static int watchdog_nmi_enable ( unsigned int cpu );
262
+ static void watchdog_nmi_disable ( unsigned int cpu );
258
263
259
264
/* watchdog kicker functions */
260
265
static enum hrtimer_restart watchdog_timer_fn (struct hrtimer * hrtimer )
@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
327
332
return HRTIMER_RESTART ;
328
333
}
329
334
335
+ static void watchdog_set_prio (unsigned int policy , unsigned int prio )
336
+ {
337
+ struct sched_param param = { .sched_priority = prio };
330
338
331
- /*
332
- * The watchdog thread - touches the timestamp.
333
- */
334
- static int watchdog ( void * unused )
339
+ sched_setscheduler ( current , policy , & param );
340
+ }
341
+
342
+ static void watchdog_enable ( unsigned int cpu )
335
343
{
336
- struct sched_param param = { .sched_priority = 0 };
337
344
struct hrtimer * hrtimer = & __raw_get_cpu_var (watchdog_hrtimer );
338
345
339
- /* initialize timestamp */
340
- __touch_watchdog ();
346
+ if (!watchdog_enabled ) {
347
+ kthread_park (current );
348
+ return ;
349
+ }
350
+
351
+ /* Enable the perf event */
352
+ watchdog_nmi_enable (cpu );
341
353
342
354
/* kick off the timer for the hardlockup detector */
355
+ hrtimer_init (hrtimer , CLOCK_MONOTONIC , HRTIMER_MODE_REL );
356
+ hrtimer -> function = watchdog_timer_fn ;
357
+
343
358
/* done here because hrtimer_start can only pin to smp_processor_id() */
344
359
hrtimer_start (hrtimer , ns_to_ktime (get_sample_period ()),
345
360
HRTIMER_MODE_REL_PINNED );
346
361
347
- set_current_state (TASK_INTERRUPTIBLE );
348
- /*
349
- * Run briefly (kicked by the hrtimer callback function) once every
350
- * get_sample_period() seconds (4 seconds by default) to reset the
351
- * softlockup timestamp. If this gets delayed for more than
352
- * 2*watchdog_thresh seconds then the debug-printout triggers in
353
- * watchdog_timer_fn().
354
- */
355
- while (!kthread_should_stop ()) {
356
- __touch_watchdog ();
357
- schedule ();
362
+ /* initialize timestamp */
363
+ watchdog_set_prio (SCHED_FIFO , MAX_RT_PRIO - 1 );
364
+ __touch_watchdog ();
365
+ }
358
366
359
- if (kthread_should_stop ())
360
- break ;
367
+ static void watchdog_disable (unsigned int cpu )
368
+ {
369
+ struct hrtimer * hrtimer = & __raw_get_cpu_var (watchdog_hrtimer );
361
370
362
- set_current_state (TASK_INTERRUPTIBLE );
363
- }
364
- /*
365
- * Drop the policy/priority elevation during thread exit to avoid a
366
- * scheduling latency spike.
367
- */
368
- __set_current_state (TASK_RUNNING );
369
- sched_setscheduler (current , SCHED_NORMAL , & param );
370
- return 0 ;
371
+ watchdog_set_prio (SCHED_NORMAL , 0 );
372
+ hrtimer_cancel (hrtimer );
373
+ /* disable the perf event */
374
+ watchdog_nmi_disable (cpu );
371
375
}
372
376
377
+ static int watchdog_should_run (unsigned int cpu )
378
+ {
379
+ return __this_cpu_read (hrtimer_interrupts ) !=
380
+ __this_cpu_read (soft_lockup_hrtimer_cnt );
381
+ }
382
+
383
+ /*
384
+ * The watchdog thread function - touches the timestamp.
385
+ *
386
+ * It only runs once every get_sample_period() seconds (4 seconds by
387
+ * default) to reset the softlockup timestamp. If this gets delayed
388
+ * for more than 2*watchdog_thresh seconds then the debug-printout
389
+ * triggers in watchdog_timer_fn().
390
+ */
391
+ static void watchdog (unsigned int cpu )
392
+ {
393
+ __this_cpu_write (soft_lockup_hrtimer_cnt ,
394
+ __this_cpu_read (hrtimer_interrupts ));
395
+ __touch_watchdog ();
396
+ }
373
397
374
398
#ifdef CONFIG_HARDLOCKUP_DETECTOR
375
399
/*
@@ -379,7 +403,7 @@ static int watchdog(void *unused)
379
403
*/
380
404
static unsigned long cpu0_err ;
381
405
382
- static int watchdog_nmi_enable (int cpu )
406
+ static int watchdog_nmi_enable (unsigned int cpu )
383
407
{
384
408
struct perf_event_attr * wd_attr ;
385
409
struct perf_event * event = per_cpu (watchdog_ev , cpu );
@@ -433,7 +457,7 @@ static int watchdog_nmi_enable(int cpu)
433
457
return 0 ;
434
458
}
435
459
436
- static void watchdog_nmi_disable (int cpu )
460
+ static void watchdog_nmi_disable (unsigned int cpu )
437
461
{
438
462
struct perf_event * event = per_cpu (watchdog_ev , cpu );
439
463
@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu)
447
471
return ;
448
472
}
449
473
#else
450
- static int watchdog_nmi_enable (int cpu ) { return 0 ; }
451
- static void watchdog_nmi_disable (int cpu ) { return ; }
474
+ static int watchdog_nmi_enable (unsigned int cpu ) { return 0 ; }
475
+ static void watchdog_nmi_disable (unsigned int cpu ) { return ; }
452
476
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
453
477
454
478
/* prepare/enable/disable routines */
455
- static void watchdog_prepare_cpu (int cpu )
456
- {
457
- struct hrtimer * hrtimer = & per_cpu (watchdog_hrtimer , cpu );
458
-
459
- WARN_ON (per_cpu (softlockup_watchdog , cpu ));
460
- hrtimer_init (hrtimer , CLOCK_MONOTONIC , HRTIMER_MODE_REL );
461
- hrtimer -> function = watchdog_timer_fn ;
462
- }
463
-
464
- static int watchdog_enable (int cpu )
465
- {
466
- struct task_struct * p = per_cpu (softlockup_watchdog , cpu );
467
- int err = 0 ;
468
-
469
- /* enable the perf event */
470
- err = watchdog_nmi_enable (cpu );
471
-
472
- /* Regardless of err above, fall through and start softlockup */
473
-
474
- /* create the watchdog thread */
475
- if (!p ) {
476
- struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
477
- p = kthread_create_on_node (watchdog , NULL , cpu_to_node (cpu ), "watchdog/%d" , cpu );
478
- if (IS_ERR (p )) {
479
- pr_err ("softlockup watchdog for %i failed\n" , cpu );
480
- if (!err ) {
481
- /* if hardlockup hasn't already set this */
482
- err = PTR_ERR (p );
483
- /* and disable the perf event */
484
- watchdog_nmi_disable (cpu );
485
- }
486
- goto out ;
487
- }
488
- sched_setscheduler (p , SCHED_FIFO , & param );
489
- kthread_bind (p , cpu );
490
- per_cpu (watchdog_touch_ts , cpu ) = 0 ;
491
- per_cpu (softlockup_watchdog , cpu ) = p ;
492
- wake_up_process (p );
493
- }
494
-
495
- out :
496
- return err ;
497
- }
498
-
499
- static void watchdog_disable (int cpu )
500
- {
501
- struct task_struct * p = per_cpu (softlockup_watchdog , cpu );
502
- struct hrtimer * hrtimer = & per_cpu (watchdog_hrtimer , cpu );
503
-
504
- /*
505
- * cancel the timer first to stop incrementing the stats
506
- * and waking up the kthread
507
- */
508
- hrtimer_cancel (hrtimer );
509
-
510
- /* disable the perf event */
511
- watchdog_nmi_disable (cpu );
512
-
513
- /* stop the watchdog thread */
514
- if (p ) {
515
- per_cpu (softlockup_watchdog , cpu ) = NULL ;
516
- kthread_stop (p );
517
- }
518
- }
519
-
520
479
/* sysctl functions */
521
480
#ifdef CONFIG_SYSCTL
522
481
static void watchdog_enable_all_cpus (void )
523
482
{
524
- int cpu ;
525
-
526
- watchdog_enabled = 0 ;
527
-
528
- for_each_online_cpu (cpu )
529
- if (!watchdog_enable (cpu ))
530
- /* if any cpu succeeds, watchdog is considered
531
- enabled for the system */
532
- watchdog_enabled = 1 ;
533
-
534
- if (!watchdog_enabled )
535
- pr_err ("failed to be enabled on some cpus\n" );
483
+ unsigned int cpu ;
536
484
485
+ if (watchdog_disabled ) {
486
+ watchdog_disabled = 0 ;
487
+ for_each_online_cpu (cpu )
488
+ kthread_unpark (per_cpu (softlockup_watchdog , cpu ));
489
+ }
537
490
}
538
491
539
492
static void watchdog_disable_all_cpus (void )
540
493
{
541
- int cpu ;
542
-
543
- for_each_online_cpu (cpu )
544
- watchdog_disable (cpu );
494
+ unsigned int cpu ;
545
495
546
- /* if all watchdogs are disabled, then they are disabled for the system */
547
- watchdog_enabled = 0 ;
496
+ if (!watchdog_disabled ) {
497
+ watchdog_disabled = 1 ;
498
+ for_each_online_cpu (cpu )
499
+ kthread_park (per_cpu (softlockup_watchdog , cpu ));
500
+ }
548
501
}
549
502
550
-
551
503
/*
552
504
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
553
505
*/
@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write,
557
509
{
558
510
int ret ;
559
511
512
+ if (watchdog_disabled < 0 )
513
+ return - ENODEV ;
514
+
560
515
ret = proc_dointvec_minmax (table , write , buffer , lenp , ppos );
561
516
if (ret || !write )
562
- goto out ;
517
+ return ret ;
563
518
564
519
if (watchdog_enabled && watchdog_thresh )
565
520
watchdog_enable_all_cpus ();
566
521
else
567
522
watchdog_disable_all_cpus ();
568
523
569
- out :
570
524
return ret ;
571
525
}
572
526
#endif /* CONFIG_SYSCTL */
573
527
574
-
575
- /*
576
- * Create/destroy watchdog threads as CPUs come and go:
577
- */
578
- static int __cpuinit
579
- cpu_callback (struct notifier_block * nfb , unsigned long action , void * hcpu )
580
- {
581
- int hotcpu = (unsigned long )hcpu ;
582
-
583
- switch (action ) {
584
- case CPU_UP_PREPARE :
585
- case CPU_UP_PREPARE_FROZEN :
586
- watchdog_prepare_cpu (hotcpu );
587
- break ;
588
- case CPU_ONLINE :
589
- case CPU_ONLINE_FROZEN :
590
- if (watchdog_enabled )
591
- watchdog_enable (hotcpu );
592
- break ;
593
- #ifdef CONFIG_HOTPLUG_CPU
594
- case CPU_UP_CANCELED :
595
- case CPU_UP_CANCELED_FROZEN :
596
- watchdog_disable (hotcpu );
597
- break ;
598
- case CPU_DEAD :
599
- case CPU_DEAD_FROZEN :
600
- watchdog_disable (hotcpu );
601
- break ;
602
- #endif /* CONFIG_HOTPLUG_CPU */
603
- }
604
-
605
- /*
606
- * hardlockup and softlockup are not important enough
607
- * to block cpu bring up. Just always succeed and
608
- * rely on printk output to flag problems.
609
- */
610
- return NOTIFY_OK ;
611
- }
612
-
613
- static struct notifier_block __cpuinitdata cpu_nfb = {
614
- .notifier_call = cpu_callback
528
+ static struct smp_hotplug_thread watchdog_threads = {
529
+ .store = & softlockup_watchdog ,
530
+ .thread_should_run = watchdog_should_run ,
531
+ .thread_fn = watchdog ,
532
+ .thread_comm = "watchdog/%u" ,
533
+ .setup = watchdog_enable ,
534
+ .park = watchdog_disable ,
535
+ .unpark = watchdog_enable ,
615
536
};
616
537
617
538
void __init lockup_detector_init (void )
618
539
{
619
- void * cpu = (void * )(long )smp_processor_id ();
620
- int err ;
621
-
622
- err = cpu_callback (& cpu_nfb , CPU_UP_PREPARE , cpu );
623
- WARN_ON (notifier_to_errno (err ));
624
-
625
- cpu_callback (& cpu_nfb , CPU_ONLINE , cpu );
626
- register_cpu_notifier (& cpu_nfb );
627
-
628
- return ;
540
+ if (smpboot_register_percpu_thread (& watchdog_threads )) {
541
+ pr_err ("Failed to create watchdog threads, disabled\n" );
542
+ watchdog_disabled = - ENODEV ;
543
+ }
629
544
}
0 commit comments