Skip to content

Commit 9cf5773

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work
Oleg suggested to replace the "watchdog/%u" threads with cpu_stop_work. That removes one thread per CPU while at the same time fixes softlockup vs SCHED_DEADLINE. But more importantly, it does away with the single smpboot_update_cpumask_percpu_thread() user, which allows cleanups/shrinkage of the smpboot interface. Suggested-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 4520843 commit 9cf5773

File tree

4 files changed

+71
-77
lines changed

4 files changed

+71
-77
lines changed

include/linux/cpuhotplug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ enum cpuhp_state {
164164
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
165165
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
166166
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
167+
CPUHP_AP_WATCHDOG_ONLINE,
167168
CPUHP_AP_WORKQUEUE_ONLINE,
168169
CPUHP_AP_RCUTREE_ONLINE,
169170
CPUHP_AP_ONLINE_DYN,

include/linux/nmi.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
3333
#define sysctl_hardlockup_all_cpu_backtrace 0
3434
#endif /* !CONFIG_SMP */
3535

36+
extern int lockup_detector_online_cpu(unsigned int cpu);
37+
extern int lockup_detector_offline_cpu(unsigned int cpu);
38+
3639
#else /* CONFIG_LOCKUP_DETECTOR */
3740
static inline void lockup_detector_init(void) { }
3841
static inline void lockup_detector_soft_poweroff(void) { }
3942
static inline void lockup_detector_cleanup(void) { }
43+
#define lockup_detector_online_cpu NULL
44+
#define lockup_detector_offline_cpu NULL
4045
#endif /* !CONFIG_LOCKUP_DETECTOR */
4146

4247
#ifdef CONFIG_SOFTLOCKUP_DETECTOR

kernel/cpu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
13441344
.startup.single = perf_event_init_cpu,
13451345
.teardown.single = perf_event_exit_cpu,
13461346
},
1347+
[CPUHP_AP_WATCHDOG_ONLINE] = {
1348+
.name = "lockup_detector:online",
1349+
.startup.single = lockup_detector_online_cpu,
1350+
.teardown.single = lockup_detector_offline_cpu,
1351+
},
13471352
[CPUHP_AP_WORKQUEUE_ONLINE] = {
13481353
.name = "workqueue:online",
13491354
.startup.single = workqueue_online_cpu,

kernel/watchdog.c

Lines changed: 60 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,14 @@
1818
#include <linux/init.h>
1919
#include <linux/module.h>
2020
#include <linux/sysctl.h>
21-
#include <linux/smpboot.h>
22-
#include <linux/sched/rt.h>
23-
#include <uapi/linux/sched/types.h>
2421
#include <linux/tick.h>
25-
#include <linux/workqueue.h>
2622
#include <linux/sched/clock.h>
2723
#include <linux/sched/debug.h>
2824
#include <linux/sched/isolation.h>
25+
#include <linux/stop_machine.h>
2926

3027
#include <asm/irq_regs.h>
3128
#include <linux/kvm_para.h>
32-
#include <linux/kthread.h>
3329

3430
static DEFINE_MUTEX(watchdog_mutex);
3531

@@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void)
169165
unsigned int __read_mostly softlockup_panic =
170166
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
171167

172-
static bool softlockup_threads_initialized __read_mostly;
168+
static bool softlockup_initialized __read_mostly;
173169
static u64 __read_mostly sample_period;
174170

175171
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
176-
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
177172
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
178173
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
179174
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -335,6 +330,25 @@ static void watchdog_interrupt_count(void)
335330
__this_cpu_inc(hrtimer_interrupts);
336331
}
337332

333+
/*
334+
* The watchdog thread function - touches the timestamp.
335+
*
336+
* It only runs once every sample_period seconds (4 seconds by
337+
* default) to reset the softlockup timestamp. If this gets delayed
338+
* for more than 2*watchdog_thresh seconds then the debug-printout
339+
* triggers in watchdog_timer_fn().
340+
*/
341+
static int softlockup_fn(void *data)
342+
{
343+
__this_cpu_write(soft_lockup_hrtimer_cnt,
344+
__this_cpu_read(hrtimer_interrupts));
345+
__touch_watchdog();
346+
347+
return 0;
348+
}
349+
350+
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
351+
338352
/* watchdog kicker functions */
339353
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
340354
{
@@ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
350364
watchdog_interrupt_count();
351365

352366
/* kick the softlockup detector */
353-
wake_up_process(__this_cpu_read(softlockup_watchdog));
367+
stop_one_cpu_nowait(smp_processor_id(),
368+
softlockup_fn, NULL,
369+
this_cpu_ptr(&softlockup_stop_work));
354370

355371
/* .. and repeat */
356372
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
448464
return HRTIMER_RESTART;
449465
}
450466

451-
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
452-
{
453-
struct sched_param param = { .sched_priority = prio };
454-
455-
sched_setscheduler(current, policy, &param);
456-
}
457-
458467
static void watchdog_enable(unsigned int cpu)
459468
{
460469
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
461470

471+
WARN_ON_ONCE(cpu != smp_processor_id());
472+
462473
/*
463474
* Start the timer first to prevent the NMI watchdog triggering
464475
* before the timer has a chance to fire.
@@ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu)
473484
/* Enable the perf event */
474485
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
475486
watchdog_nmi_enable(cpu);
476-
477-
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
478487
}
479488

480489
static void watchdog_disable(unsigned int cpu)
481490
{
482491
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
483492

484-
watchdog_set_prio(SCHED_NORMAL, 0);
493+
WARN_ON_ONCE(cpu != smp_processor_id());
494+
485495
/*
486496
* Disable the perf event first. That prevents that a large delay
487497
* between disabling the timer and disabling the perf event causes
@@ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu)
491501
hrtimer_cancel(hrtimer);
492502
}
493503

494-
static void watchdog_cleanup(unsigned int cpu, bool online)
504+
static int softlockup_stop_fn(void *data)
495505
{
496-
watchdog_disable(cpu);
506+
watchdog_disable(smp_processor_id());
507+
return 0;
497508
}
498509

499-
static int watchdog_should_run(unsigned int cpu)
510+
static void softlockup_stop_all(void)
500511
{
501-
return __this_cpu_read(hrtimer_interrupts) !=
502-
__this_cpu_read(soft_lockup_hrtimer_cnt);
512+
int cpu;
513+
514+
if (!softlockup_initialized)
515+
return;
516+
517+
for_each_cpu(cpu, &watchdog_allowed_mask)
518+
smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
519+
520+
cpumask_clear(&watchdog_allowed_mask);
503521
}
504522

505-
/*
506-
* The watchdog thread function - touches the timestamp.
507-
*
508-
* It only runs once every sample_period seconds (4 seconds by
509-
* default) to reset the softlockup timestamp. If this gets delayed
510-
* for more than 2*watchdog_thresh seconds then the debug-printout
511-
* triggers in watchdog_timer_fn().
512-
*/
513-
static void watchdog(unsigned int cpu)
523+
static int softlockup_start_fn(void *data)
514524
{
515-
__this_cpu_write(soft_lockup_hrtimer_cnt,
516-
__this_cpu_read(hrtimer_interrupts));
517-
__touch_watchdog();
525+
watchdog_enable(smp_processor_id());
526+
return 0;
518527
}
519528

520-
static struct smp_hotplug_thread watchdog_threads = {
521-
.store = &softlockup_watchdog,
522-
.thread_should_run = watchdog_should_run,
523-
.thread_fn = watchdog,
524-
.thread_comm = "watchdog/%u",
525-
.setup = watchdog_enable,
526-
.cleanup = watchdog_cleanup,
527-
.park = watchdog_disable,
528-
.unpark = watchdog_enable,
529-
};
530-
531-
static void softlockup_update_smpboot_threads(void)
529+
static void softlockup_start_all(void)
532530
{
533-
lockdep_assert_held(&watchdog_mutex);
534-
535-
if (!softlockup_threads_initialized)
536-
return;
531+
int cpu;
537532

538-
smpboot_update_cpumask_percpu_thread(&watchdog_threads,
539-
&watchdog_allowed_mask);
533+
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
534+
for_each_cpu(cpu, &watchdog_allowed_mask)
535+
smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
540536
}
541537

542-
/* Temporarily park all watchdog threads */
543-
static void softlockup_park_all_threads(void)
538+
int lockup_detector_online_cpu(unsigned int cpu)
544539
{
545-
cpumask_clear(&watchdog_allowed_mask);
546-
softlockup_update_smpboot_threads();
540+
watchdog_enable(cpu);
541+
return 0;
547542
}
548543

549-
/* Unpark enabled threads */
550-
static void softlockup_unpark_threads(void)
544+
int lockup_detector_offline_cpu(unsigned int cpu)
551545
{
552-
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
553-
softlockup_update_smpboot_threads();
546+
watchdog_disable(cpu);
547+
return 0;
554548
}
555549

556550
static void lockup_detector_reconfigure(void)
557551
{
558552
cpus_read_lock();
559553
watchdog_nmi_stop();
560-
softlockup_park_all_threads();
554+
555+
softlockup_stop_all();
561556
set_sample_period();
562557
lockup_detector_update_enable();
563558
if (watchdog_enabled && watchdog_thresh)
564-
softlockup_unpark_threads();
559+
softlockup_start_all();
560+
565561
watchdog_nmi_start();
566562
cpus_read_unlock();
567563
/*
@@ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void)
580576
*/
581577
static __init void lockup_detector_setup(void)
582578
{
583-
int ret;
584-
585579
/*
586580
* If sysctl is off and watchdog got disabled on the command line,
587581
* nothing to do here.
@@ -592,24 +586,13 @@ static __init void lockup_detector_setup(void)
592586
!(watchdog_enabled && watchdog_thresh))
593587
return;
594588

595-
ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
596-
&watchdog_allowed_mask);
597-
if (ret) {
598-
pr_err("Failed to initialize soft lockup detector threads\n");
599-
return;
600-
}
601-
602589
mutex_lock(&watchdog_mutex);
603-
softlockup_threads_initialized = true;
604590
lockup_detector_reconfigure();
591+
softlockup_initialized = true;
605592
mutex_unlock(&watchdog_mutex);
606593
}
607594

608595
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
609-
static inline int watchdog_park_threads(void) { return 0; }
610-
static inline void watchdog_unpark_threads(void) { }
611-
static inline int watchdog_enable_all_cpus(void) { return 0; }
612-
static inline void watchdog_disable_all_cpus(void) { }
613596
static void lockup_detector_reconfigure(void)
614597
{
615598
cpus_read_lock();

0 commit comments

Comments
 (0)