Skip to content

Commit e18a5eb

Browse files
committed
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull watchdog fix from Thomas Gleixner: "A fix for the hardlockup watchdog to prevent false positives with extreme Turbo-Modes which make the perf/NMI watchdog fire faster than the hrtimer which is used to verify. Slightly larger than the minimal fix, which just would increase the hrtimer frequency, but comes with extra overhead of more watchdog timer interrupts and thread wakeups for all users. With this change we restrict the overhead to the extreme Turbo-Mode systems" * 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: kernel/watchdog: Prevent false positives with turbo modes
2 parents 58d4e45 + 7edaeb6 commit e18a5eb

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ config X86
100100
select GENERIC_STRNCPY_FROM_USER
101101
select GENERIC_STRNLEN_USER
102102
select GENERIC_TIME_VSYSCALL
103+
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
103104
select HAVE_ACPI_APEI if ACPI
104105
select HAVE_ACPI_APEI_NMI if ACPI
105106
select HAVE_ALIGNED_STRUCT_PAGE if SLUB

include/linux/nmi.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
168168
#define sysctl_softlockup_all_cpu_backtrace 0
169169
#define sysctl_hardlockup_all_cpu_backtrace 0
170170
#endif
171+
172+
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
173+
defined(CONFIG_HARDLOCKUP_DETECTOR)
174+
void watchdog_update_hrtimer_threshold(u64 period);
175+
#else
176+
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
177+
#endif
178+
171179
extern bool is_hardlockup(void);
172180
struct ctl_table;
173181
extern int proc_watchdog(struct ctl_table *, int ,

kernel/watchdog.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ static void set_sample_period(void)
240240
* hardlockup detector generates a warning
241241
*/
242242
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
243+
watchdog_update_hrtimer_threshold(sample_period);
243244
}
244245

245246
/* Commands for resetting the watchdog */

kernel/watchdog_hld.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
3737
}
3838
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
3939

40+
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
41+
static DEFINE_PER_CPU(ktime_t, last_timestamp);
42+
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
43+
static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
44+
45+
void watchdog_update_hrtimer_threshold(u64 period)
46+
{
47+
/*
48+
* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
49+
*
50+
* So it runs effectively with 2.5 times the rate of the NMI
51+
* watchdog. That means the hrtimer should fire 2-3 times before
52+
* the NMI watchdog expires. The NMI watchdog on x86 is based on
53+
* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
54+
* might run way faster than expected and the NMI fires in a
55+
* smaller period than the one deduced from the nominal CPU
56+
* frequency. Depending on the Turbo-Mode factor this might be fast
57+
* enough to get the NMI period smaller than the hrtimer watchdog
58+
* period and trigger false positives.
59+
*
60+
* The sample threshold is used to check in the NMI handler whether
61+
* the minimum time between two NMI samples has elapsed. That
62+
* prevents false positives.
63+
*
64+
* Set this to 4/5 of the actual watchdog threshold period so the
65+
* hrtimer is guaranteed to fire at least once within the real
66+
* watchdog threshold.
67+
*/
68+
watchdog_hrtimer_sample_threshold = period * 2;
69+
}
70+
71+
static bool watchdog_check_timestamp(void)
72+
{
73+
ktime_t delta, now = ktime_get_mono_fast_ns();
74+
75+
delta = now - __this_cpu_read(last_timestamp);
76+
if (delta < watchdog_hrtimer_sample_threshold) {
77+
/*
78+
* If ktime is jiffies based, a stalled timer would prevent
79+
* jiffies from being incremented and the filter would look
80+
* at a stale timestamp and never trigger.
81+
*/
82+
if (__this_cpu_inc_return(nmi_rearmed) < 10)
83+
return false;
84+
}
85+
__this_cpu_write(nmi_rearmed, 0);
86+
__this_cpu_write(last_timestamp, now);
87+
return true;
88+
}
89+
#else
90+
static inline bool watchdog_check_timestamp(void)
91+
{
92+
return true;
93+
}
94+
#endif
95+
4096
static struct perf_event_attr wd_hw_attr = {
4197
.type = PERF_TYPE_HARDWARE,
4298
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
61117
return;
62118
}
63119

120+
if (!watchdog_check_timestamp())
121+
return;
122+
64123
/* check for a hardlockup
65124
* This is done by making sure our timer interrupt
66125
* is incrementing. The timer interrupt should have

lib/Kconfig.debug

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF
797797
bool
798798
select SOFTLOCKUP_DETECTOR
799799

800+
#
801+
# Enables a timestamp based low pass filter to compensate for perf based
802+
# hard lockup detection which runs too fast due to turbo modes.
803+
#
804+
config HARDLOCKUP_CHECK_TIMESTAMP
805+
bool
806+
800807
#
801808
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
802809
# lockup detector rather than the perf based detector.

0 commit comments

Comments
 (0)