Skip to content

Commit 7edaeb6

Browse files
committed
kernel/watchdog: Prevent false positives with turbo modes
The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687ac ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang <Kan.liang@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
1 parent ef95484 commit 7edaeb6

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ config X86
100100
select GENERIC_STRNCPY_FROM_USER
101101
select GENERIC_STRNLEN_USER
102102
select GENERIC_TIME_VSYSCALL
103+
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
103104
select HAVE_ACPI_APEI if ACPI
104105
select HAVE_ACPI_APEI_NMI if ACPI
105106
select HAVE_ALIGNED_STRUCT_PAGE if SLUB

include/linux/nmi.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
168168
#define sysctl_softlockup_all_cpu_backtrace 0
169169
#define sysctl_hardlockup_all_cpu_backtrace 0
170170
#endif
171+
172+
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
173+
defined(CONFIG_HARDLOCKUP_DETECTOR)
174+
void watchdog_update_hrtimer_threshold(u64 period);
175+
#else
176+
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
177+
#endif
178+
171179
extern bool is_hardlockup(void);
172180
struct ctl_table;
173181
extern int proc_watchdog(struct ctl_table *, int ,

kernel/watchdog.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ static void set_sample_period(void)
240240
* hardlockup detector generates a warning
241241
*/
242242
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
243+
watchdog_update_hrtimer_threshold(sample_period);
243244
}
244245

245246
/* Commands for resetting the watchdog */

kernel/watchdog_hld.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
3737
}
3838
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
3939

40+
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
41+
static DEFINE_PER_CPU(ktime_t, last_timestamp);
42+
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
43+
static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
44+
45+
void watchdog_update_hrtimer_threshold(u64 period)
46+
{
47+
/*
48+
* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
49+
*
50+
* So it runs effectively with 2.5 times the rate of the NMI
51+
* watchdog. That means the hrtimer should fire 2-3 times before
52+
* the NMI watchdog expires. The NMI watchdog on x86 is based on
53+
* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
54+
* might run way faster than expected and the NMI fires in a
55+
* smaller period than the one deduced from the nominal CPU
56+
* frequency. Depending on the Turbo-Mode factor this might be fast
57+
* enough to get the NMI period smaller than the hrtimer watchdog
58+
* period and trigger false positives.
59+
*
60+
* The sample threshold is used to check in the NMI handler whether
61+
* the minimum time between two NMI samples has elapsed. That
62+
* prevents false positives.
63+
*
64+
* Set this to 4/5 of the actual watchdog threshold period so the
65+
* hrtimer is guaranteed to fire at least once within the real
66+
* watchdog threshold.
67+
*/
68+
watchdog_hrtimer_sample_threshold = period * 2;
69+
}
70+
71+
static bool watchdog_check_timestamp(void)
72+
{
73+
ktime_t delta, now = ktime_get_mono_fast_ns();
74+
75+
delta = now - __this_cpu_read(last_timestamp);
76+
if (delta < watchdog_hrtimer_sample_threshold) {
77+
/*
78+
* If ktime is jiffies based, a stalled timer would prevent
79+
* jiffies from being incremented and the filter would look
80+
* at a stale timestamp and never trigger.
81+
*/
82+
if (__this_cpu_inc_return(nmi_rearmed) < 10)
83+
return false;
84+
}
85+
__this_cpu_write(nmi_rearmed, 0);
86+
__this_cpu_write(last_timestamp, now);
87+
return true;
88+
}
89+
#else
90+
static inline bool watchdog_check_timestamp(void)
91+
{
92+
return true;
93+
}
94+
#endif
95+
4096
static struct perf_event_attr wd_hw_attr = {
4197
.type = PERF_TYPE_HARDWARE,
4298
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
61117
return;
62118
}
63119

120+
if (!watchdog_check_timestamp())
121+
return;
122+
64123
/* check for a hardlockup
65124
* This is done by making sure our timer interrupt
66125
* is incrementing. The timer interrupt should have

lib/Kconfig.debug

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF
797797
bool
798798
select SOFTLOCKUP_DETECTOR
799799

800+
#
801+
# Enables a timestamp based low pass filter to compensate for perf based
802+
# hard lockup detection which runs too fast due to turbo modes.
803+
#
804+
config HARDLOCKUP_CHECK_TIMESTAMP
805+
bool
806+
800807
#
801808
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
802809
# lockup detector rather than the perf based detector.

0 commit comments

Comments
 (0)