Skip to content

Commit e23604e

Browse files
committed
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull NOHZ updates from Ingo Molnar: "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors the NOHZ 'can the tick be stopped?' infrastructure and related code to be data driven, and harmonizes the naming and handling of all the various properties" [ This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. I'm pulling it, but I've asked Ingo and Frederic to get this fixed up ] * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched-clock: Migrate to use new tick dependency mask model posix-cpu-timers: Migrate to use new tick dependency mask model sched: Migrate sched to use new tick dependency mask model sched: Account rr tasks perf: Migrate perf to use new tick dependency mask model nohz: Use enum code for tick stop failure tracing message nohz: New tick dependency mask nohz: Implement wide kick on top of irq work atomic: Export fetch_or()
2 parents d4e7961 + 1f25184 commit e23604e

File tree

14 files changed

+424
-161
lines changed

14 files changed

+424
-161
lines changed

include/linux/atomic.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
548548
}
549549
#endif
550550

551+
/**
552+
* fetch_or - perform *ptr |= mask and return old value of *ptr
553+
* @ptr: pointer to value
554+
* @mask: mask to OR on the value
555+
*
556+
* cmpxchg based fetch_or, macro so it works for different integer types
557+
*/
558+
#ifndef fetch_or
559+
#define fetch_or(ptr, mask) \
560+
({ typeof(*(ptr)) __old, __val = *(ptr); \
561+
for (;;) { \
562+
__old = cmpxchg((ptr), __val, __val | (mask)); \
563+
if (__old == __val) \
564+
break; \
565+
__val = __old; \
566+
} \
567+
__old; \
568+
})
569+
#endif
570+
571+
551572
#ifdef CONFIG_GENERIC_ATOMIC64
552573
#include <asm-generic/atomic64.h>
553574
#endif

include/linux/perf_event.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
11101110
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
11111111
#endif
11121112

1113-
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
1114-
extern bool perf_event_can_stop_tick(void);
1115-
#else
1116-
static inline bool perf_event_can_stop_tick(void) { return true; }
1117-
#endif
1118-
11191113
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
11201114
extern void perf_restore_debug_store(void);
11211115
#else

include/linux/posix-timers.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
128128
void run_posix_cpu_timers(struct task_struct *task);
129129
void posix_cpu_timers_exit(struct task_struct *task);
130130
void posix_cpu_timers_exit_group(struct task_struct *task);
131-
132-
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
133-
134131
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
135132
cputime_t *newval, cputime_t *oldval);
136133

include/linux/sched.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,10 @@ struct signal_struct {
717717
/* Earliest-expiration cache. */
718718
struct task_cputime cputime_expires;
719719

720+
#ifdef CONFIG_NO_HZ_FULL
721+
unsigned long tick_dep_mask;
722+
#endif
723+
720724
struct list_head cpu_timers[3];
721725

722726
struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
15421546
VTIME_SYS,
15431547
} vtime_snap_whence;
15441548
#endif
1549+
1550+
#ifdef CONFIG_NO_HZ_FULL
1551+
unsigned long tick_dep_mask;
1552+
#endif
15451553
unsigned long nvcsw, nivcsw; /* context switch counts */
15461554
u64 start_time; /* monotonic time in nsec */
15471555
u64 real_start_time; /* boot based time in nsec */
@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
23562364
#endif
23572365

23582366
#ifdef CONFIG_NO_HZ_FULL
2359-
extern bool sched_can_stop_tick(void);
23602367
extern u64 scheduler_tick_max_deferment(void);
2361-
#else
2362-
static inline bool sched_can_stop_tick(void) { return false; }
23632368
#endif
23642369

23652370
#ifdef CONFIG_SCHED_AUTOGROUP

include/linux/tick.h

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
9797
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
9898
}
9999

100+
enum tick_dep_bits {
101+
TICK_DEP_BIT_POSIX_TIMER = 0,
102+
TICK_DEP_BIT_PERF_EVENTS = 1,
103+
TICK_DEP_BIT_SCHED = 2,
104+
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
105+
};
106+
107+
#define TICK_DEP_MASK_NONE 0
108+
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
109+
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
110+
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
111+
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
112+
100113
#ifdef CONFIG_NO_HZ_COMMON
101114
extern int tick_nohz_enabled;
102115
extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
154167
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
155168
}
156169

157-
extern void tick_nohz_full_kick(void);
170+
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
171+
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
172+
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
173+
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
174+
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
175+
enum tick_dep_bits bit);
176+
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
177+
enum tick_dep_bits bit);
178+
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
179+
enum tick_dep_bits bit);
180+
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
181+
enum tick_dep_bits bit);
182+
183+
/*
184+
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
185+
* on top of static keys.
186+
*/
187+
static inline void tick_dep_set(enum tick_dep_bits bit)
188+
{
189+
if (tick_nohz_full_enabled())
190+
tick_nohz_dep_set(bit);
191+
}
192+
193+
static inline void tick_dep_clear(enum tick_dep_bits bit)
194+
{
195+
if (tick_nohz_full_enabled())
196+
tick_nohz_dep_clear(bit);
197+
}
198+
199+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
200+
{
201+
if (tick_nohz_full_cpu(cpu))
202+
tick_nohz_dep_set_cpu(cpu, bit);
203+
}
204+
205+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
206+
{
207+
if (tick_nohz_full_cpu(cpu))
208+
tick_nohz_dep_clear_cpu(cpu, bit);
209+
}
210+
211+
static inline void tick_dep_set_task(struct task_struct *tsk,
212+
enum tick_dep_bits bit)
213+
{
214+
if (tick_nohz_full_enabled())
215+
tick_nohz_dep_set_task(tsk, bit);
216+
}
217+
static inline void tick_dep_clear_task(struct task_struct *tsk,
218+
enum tick_dep_bits bit)
219+
{
220+
if (tick_nohz_full_enabled())
221+
tick_nohz_dep_clear_task(tsk, bit);
222+
}
223+
static inline void tick_dep_set_signal(struct signal_struct *signal,
224+
enum tick_dep_bits bit)
225+
{
226+
if (tick_nohz_full_enabled())
227+
tick_nohz_dep_set_signal(signal, bit);
228+
}
229+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
230+
enum tick_dep_bits bit)
231+
{
232+
if (tick_nohz_full_enabled())
233+
tick_nohz_dep_clear_signal(signal, bit);
234+
}
235+
158236
extern void tick_nohz_full_kick_cpu(int cpu);
159-
extern void tick_nohz_full_kick_all(void);
160237
extern void __tick_nohz_task_switch(void);
161238
#else
162239
static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
166243
static inline bool tick_nohz_full_enabled(void) { return false; }
167244
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
168245
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
246+
247+
static inline void tick_dep_set(enum tick_dep_bits bit) { }
248+
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
249+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
250+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
251+
static inline void tick_dep_set_task(struct task_struct *tsk,
252+
enum tick_dep_bits bit) { }
253+
static inline void tick_dep_clear_task(struct task_struct *tsk,
254+
enum tick_dep_bits bit) { }
255+
static inline void tick_dep_set_signal(struct signal_struct *signal,
256+
enum tick_dep_bits bit) { }
257+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
258+
enum tick_dep_bits bit) { }
259+
169260
static inline void tick_nohz_full_kick_cpu(int cpu) { }
170-
static inline void tick_nohz_full_kick(void) { }
171-
static inline void tick_nohz_full_kick_all(void) { }
172261
static inline void __tick_nohz_task_switch(void) { }
173262
#endif
174263

include/trace/events/timer.h

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
328328
);
329329

330330
#ifdef CONFIG_NO_HZ_COMMON
331+
332+
#define TICK_DEP_NAMES \
333+
tick_dep_name(NONE) \
334+
tick_dep_name(POSIX_TIMER) \
335+
tick_dep_name(PERF_EVENTS) \
336+
tick_dep_name(SCHED) \
337+
tick_dep_name_end(CLOCK_UNSTABLE)
338+
339+
#undef tick_dep_name
340+
#undef tick_dep_name_end
341+
342+
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
343+
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
344+
345+
TICK_DEP_NAMES
346+
347+
#undef tick_dep_name
348+
#undef tick_dep_name_end
349+
350+
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
351+
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
352+
353+
#define show_tick_dep_name(val) \
354+
__print_symbolic(val, TICK_DEP_NAMES)
355+
331356
TRACE_EVENT(tick_stop,
332357

333-
TP_PROTO(int success, char *error_msg),
358+
TP_PROTO(int success, int dependency),
334359

335-
TP_ARGS(success, error_msg),
360+
TP_ARGS(success, dependency),
336361

337362
TP_STRUCT__entry(
338363
__field( int , success )
339-
__string( msg, error_msg )
364+
__field( int , dependency )
340365
),
341366

342367
TP_fast_assign(
343368
__entry->success = success;
344-
__assign_str(msg, error_msg);
369+
__entry->dependency = dependency;
345370
),
346371

347-
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
372+
TP_printk("success=%d dependency=%s", __entry->success, \
373+
show_tick_dep_name(__entry->dependency))
348374
);
349375
#endif
350376

kernel/events/core.c

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
31123112
return rotate;
31133113
}
31143114

3115-
#ifdef CONFIG_NO_HZ_FULL
3116-
bool perf_event_can_stop_tick(void)
3117-
{
3118-
if (atomic_read(&nr_freq_events) ||
3119-
__this_cpu_read(perf_throttled_count))
3120-
return false;
3121-
else
3122-
return true;
3123-
}
3124-
#endif
3125-
31263115
void perf_event_task_tick(void)
31273116
{
31283117
struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
31333122

31343123
__this_cpu_inc(perf_throttled_seq);
31353124
throttled = __this_cpu_xchg(perf_throttled_count, 0);
3125+
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
31363126

31373127
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
31383128
perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
35643554
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
35653555
}
35663556

3557+
#ifdef CONFIG_NO_HZ_FULL
3558+
static DEFINE_SPINLOCK(nr_freq_lock);
3559+
#endif
3560+
3561+
static void unaccount_freq_event_nohz(void)
3562+
{
3563+
#ifdef CONFIG_NO_HZ_FULL
3564+
spin_lock(&nr_freq_lock);
3565+
if (atomic_dec_and_test(&nr_freq_events))
3566+
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
3567+
spin_unlock(&nr_freq_lock);
3568+
#endif
3569+
}
3570+
3571+
static void unaccount_freq_event(void)
3572+
{
3573+
if (tick_nohz_full_enabled())
3574+
unaccount_freq_event_nohz();
3575+
else
3576+
atomic_dec(&nr_freq_events);
3577+
}
3578+
35673579
static void unaccount_event(struct perf_event *event)
35683580
{
35693581
bool dec = false;
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
35803592
if (event->attr.task)
35813593
atomic_dec(&nr_task_events);
35823594
if (event->attr.freq)
3583-
atomic_dec(&nr_freq_events);
3595+
unaccount_freq_event();
35843596
if (event->attr.context_switch) {
35853597
dec = true;
35863598
atomic_dec(&nr_switch_events);
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
64246436
if (unlikely(throttle
64256437
&& hwc->interrupts >= max_samples_per_tick)) {
64266438
__this_cpu_inc(perf_throttled_count);
6439+
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
64276440
hwc->interrupts = MAX_INTERRUPTS;
64286441
perf_log_throttle(event, 0);
6429-
tick_nohz_full_kick();
64306442
ret = 1;
64316443
}
64326444
}
@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
78157827
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
78167828
}
78177829

7830+
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
7831+
static void account_freq_event_nohz(void)
7832+
{
7833+
#ifdef CONFIG_NO_HZ_FULL
7834+
/* Lock so we don't race with concurrent unaccount */
7835+
spin_lock(&nr_freq_lock);
7836+
if (atomic_inc_return(&nr_freq_events) == 1)
7837+
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
7838+
spin_unlock(&nr_freq_lock);
7839+
#endif
7840+
}
7841+
7842+
static void account_freq_event(void)
7843+
{
7844+
if (tick_nohz_full_enabled())
7845+
account_freq_event_nohz();
7846+
else
7847+
atomic_inc(&nr_freq_events);
7848+
}
7849+
7850+
78187851
static void account_event(struct perf_event *event)
78197852
{
78207853
bool inc = false;
@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
78307863
atomic_inc(&nr_comm_events);
78317864
if (event->attr.task)
78327865
atomic_inc(&nr_task_events);
7833-
if (event->attr.freq) {
7834-
if (atomic_inc_return(&nr_freq_events) == 1)
7835-
tick_nohz_full_kick_all();
7836-
}
7866+
if (event->attr.freq)
7867+
account_freq_event();
78377868
if (event->attr.context_switch) {
78387869
atomic_inc(&nr_switch_events);
78397870
inc = true;

0 commit comments

Comments
 (0)