Skip to content

Commit d027d45

Browse files
committed
nohz: New tick dependency mask
The tick dependency is evaluated on every IRQ and context switch. This consists is a batch of checks which determine whether it is safe to stop the tick or not. These checks are often split in many details: posix cpu timers, scheduler, sched clock, perf events.... each of which are made of smaller details: posix cpu timer involves checking process wide timers then thread wide timers. Perf involves checking freq events then more per cpu details. Checking these informations asynchronously every time we update the full dynticks state bring avoidable overhead and a messy layout. Let's introduce instead tick dependency masks: one for system wide dependency (unstable sched clock, freq based perf events), one for CPU wide dependency (sched, throttling perf events), and task/signal level dependencies (posix cpu timers). The subsystems are responsible for setting and clearing their dependency through a set of APIs that will take care of concurrent dependency mask modifications and kick targets to restart the relevant CPU tick whenever needed. This new dependency engine stays beside the old one until all subsystems having a tick dependency are converted to it. Suggested-by: Thomas Gleixner <tglx@linutronix.de> Suggested-by: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com> Cc: Christoph Lameter <cl@linux.com> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Luiz Capitulino <lcapitulino@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
1 parent 8537bb9 commit d027d45

File tree

4 files changed

+244
-7
lines changed

4 files changed

+244
-7
lines changed

include/linux/sched.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,10 @@ struct signal_struct {
719719
/* Earliest-expiration cache. */
720720
struct task_cputime cputime_expires;
721721

722+
#ifdef CONFIG_NO_HZ_FULL
723+
unsigned long tick_dep_mask;
724+
#endif
725+
722726
struct list_head cpu_timers[3];
723727

724728
struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
15421546
VTIME_SYS,
15431547
} vtime_snap_whence;
15441548
#endif
1549+
1550+
#ifdef CONFIG_NO_HZ_FULL
1551+
unsigned long tick_dep_mask;
1552+
#endif
15451553
unsigned long nvcsw, nivcsw; /* context switch counts */
15461554
u64 start_time; /* monotonic time in nsec */
15471555
u64 real_start_time; /* boot based time in nsec */

include/linux/tick.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void)
9797
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
9898
}
9999

100+
enum tick_dep_bits {
101+
TICK_DEP_BIT_POSIX_TIMER = 0,
102+
TICK_DEP_BIT_PERF_EVENTS = 1,
103+
TICK_DEP_BIT_SCHED = 2,
104+
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
105+
};
106+
107+
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
108+
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
109+
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
110+
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
111+
100112
#ifdef CONFIG_NO_HZ_COMMON
101113
extern int tick_nohz_enabled;
102114
extern int tick_nohz_tick_stopped(void);
@@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void)
154166
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
155167
}
156168

169+
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
170+
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
171+
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
172+
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
173+
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
174+
enum tick_dep_bits bit);
175+
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
176+
enum tick_dep_bits bit);
177+
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
178+
enum tick_dep_bits bit);
179+
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
180+
enum tick_dep_bits bit);
181+
182+
/*
183+
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
184+
* on top of static keys.
185+
*/
186+
static inline void tick_dep_set(enum tick_dep_bits bit)
187+
{
188+
if (tick_nohz_full_enabled())
189+
tick_nohz_dep_set(bit);
190+
}
191+
192+
static inline void tick_dep_clear(enum tick_dep_bits bit)
193+
{
194+
if (tick_nohz_full_enabled())
195+
tick_nohz_dep_clear(bit);
196+
}
197+
198+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
199+
{
200+
if (tick_nohz_full_cpu(cpu))
201+
tick_nohz_dep_set_cpu(cpu, bit);
202+
}
203+
204+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
205+
{
206+
if (tick_nohz_full_cpu(cpu))
207+
tick_nohz_dep_clear_cpu(cpu, bit);
208+
}
209+
210+
static inline void tick_dep_set_task(struct task_struct *tsk,
211+
enum tick_dep_bits bit)
212+
{
213+
if (tick_nohz_full_enabled())
214+
tick_nohz_dep_set_task(tsk, bit);
215+
}
216+
static inline void tick_dep_clear_task(struct task_struct *tsk,
217+
enum tick_dep_bits bit)
218+
{
219+
if (tick_nohz_full_enabled())
220+
tick_nohz_dep_clear_task(tsk, bit);
221+
}
222+
static inline void tick_dep_set_signal(struct signal_struct *signal,
223+
enum tick_dep_bits bit)
224+
{
225+
if (tick_nohz_full_enabled())
226+
tick_nohz_dep_set_signal(signal, bit);
227+
}
228+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
229+
enum tick_dep_bits bit)
230+
{
231+
if (tick_nohz_full_enabled())
232+
tick_nohz_dep_clear_signal(signal, bit);
233+
}
234+
157235
extern void tick_nohz_full_kick(void);
158236
extern void tick_nohz_full_kick_cpu(int cpu);
159237
extern void tick_nohz_full_kick_all(void);
@@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void)
166244
static inline bool tick_nohz_full_enabled(void) { return false; }
167245
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
168246
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
247+
248+
static inline void tick_dep_set(enum tick_dep_bits bit) { }
249+
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
250+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
251+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
252+
static inline void tick_dep_set_task(struct task_struct *tsk,
253+
enum tick_dep_bits bit) { }
254+
static inline void tick_dep_clear_task(struct task_struct *tsk,
255+
enum tick_dep_bits bit) { }
256+
static inline void tick_dep_set_signal(struct signal_struct *signal,
257+
enum tick_dep_bits bit) { }
258+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
259+
enum tick_dep_bits bit) { }
260+
169261
static inline void tick_nohz_full_kick_cpu(int cpu) { }
170262
static inline void tick_nohz_full_kick(void) { }
171263
static inline void tick_nohz_full_kick_all(void) { }

kernel/time/tick-sched.c

Lines changed: 143 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,53 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
158158
cpumask_var_t tick_nohz_full_mask;
159159
cpumask_var_t housekeeping_mask;
160160
bool tick_nohz_full_running;
161+
static unsigned long tick_dep_mask;
161162

162-
static bool can_stop_full_tick(void)
163+
static void trace_tick_dependency(unsigned long dep)
164+
{
165+
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
166+
trace_tick_stop(0, "posix timers running\n");
167+
return;
168+
}
169+
170+
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
171+
trace_tick_stop(0, "perf events running\n");
172+
return;
173+
}
174+
175+
if (dep & TICK_DEP_MASK_SCHED) {
176+
trace_tick_stop(0, "more than 1 task in runqueue\n");
177+
return;
178+
}
179+
180+
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
181+
trace_tick_stop(0, "unstable sched clock\n");
182+
}
183+
184+
static bool can_stop_full_tick(struct tick_sched *ts)
163185
{
164186
WARN_ON_ONCE(!irqs_disabled());
165187

188+
if (tick_dep_mask) {
189+
trace_tick_dependency(tick_dep_mask);
190+
return false;
191+
}
192+
193+
if (ts->tick_dep_mask) {
194+
trace_tick_dependency(ts->tick_dep_mask);
195+
return false;
196+
}
197+
198+
if (current->tick_dep_mask) {
199+
trace_tick_dependency(current->tick_dep_mask);
200+
return false;
201+
}
202+
203+
if (current->signal->tick_dep_mask) {
204+
trace_tick_dependency(current->signal->tick_dep_mask);
205+
return false;
206+
}
207+
166208
if (!sched_can_stop_tick()) {
167209
trace_tick_stop(0, "more than 1 task in runqueue\n");
168210
return false;
@@ -178,9 +220,10 @@ static bool can_stop_full_tick(void)
178220
return false;
179221
}
180222

181-
/* sched_clock_tick() needs us? */
182223
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
183224
/*
225+
* sched_clock_tick() needs us?
226+
*
184227
* TODO: kick full dynticks CPUs when
185228
* sched_clock_stable is set.
186229
*/
@@ -199,13 +242,13 @@ static bool can_stop_full_tick(void)
199242
return true;
200243
}
201244

202-
static void nohz_full_kick_work_func(struct irq_work *work)
245+
static void nohz_full_kick_func(struct irq_work *work)
203246
{
204247
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
205248
}
206249

207250
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
208-
.func = nohz_full_kick_work_func,
251+
.func = nohz_full_kick_func,
209252
};
210253

211254
/*
@@ -251,6 +294,95 @@ void tick_nohz_full_kick_all(void)
251294
preempt_enable();
252295
}
253296

297+
static void tick_nohz_dep_set_all(unsigned long *dep,
298+
enum tick_dep_bits bit)
299+
{
300+
unsigned long prev;
301+
302+
prev = fetch_or(dep, BIT_MASK(bit));
303+
if (!prev)
304+
tick_nohz_full_kick_all();
305+
}
306+
307+
/*
308+
* Set a global tick dependency. Used by perf events that rely on freq and
309+
* by unstable clock.
310+
*/
311+
void tick_nohz_dep_set(enum tick_dep_bits bit)
312+
{
313+
tick_nohz_dep_set_all(&tick_dep_mask, bit);
314+
}
315+
316+
void tick_nohz_dep_clear(enum tick_dep_bits bit)
317+
{
318+
clear_bit(bit, &tick_dep_mask);
319+
}
320+
321+
/*
322+
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
323+
* manage events throttling.
324+
*/
325+
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
326+
{
327+
unsigned long prev;
328+
struct tick_sched *ts;
329+
330+
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
331+
332+
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
333+
if (!prev) {
334+
preempt_disable();
335+
/* Perf needs local kick that is NMI safe */
336+
if (cpu == smp_processor_id()) {
337+
tick_nohz_full_kick();
338+
} else {
339+
/* Remote irq work not NMI-safe */
340+
if (!WARN_ON_ONCE(in_nmi()))
341+
tick_nohz_full_kick_cpu(cpu);
342+
}
343+
preempt_enable();
344+
}
345+
}
346+
347+
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
348+
{
349+
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
350+
351+
clear_bit(bit, &ts->tick_dep_mask);
352+
}
353+
354+
/*
355+
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
356+
* per task timers.
357+
*/
358+
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
359+
{
360+
/*
361+
* We could optimize this with just kicking the target running the task
362+
* if that noise matters for nohz full users.
363+
*/
364+
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
365+
}
366+
367+
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
368+
{
369+
clear_bit(bit, &tsk->tick_dep_mask);
370+
}
371+
372+
/*
373+
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
374+
* per process timers.
375+
*/
376+
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
377+
{
378+
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
379+
}
380+
381+
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
382+
{
383+
clear_bit(bit, &sig->tick_dep_mask);
384+
}
385+
254386
/*
255387
* Re-evaluate the need for the tick as we switch the current task.
256388
* It might need the tick due to per task/process properties:
@@ -259,15 +391,19 @@ void tick_nohz_full_kick_all(void)
259391
void __tick_nohz_task_switch(void)
260392
{
261393
unsigned long flags;
394+
struct tick_sched *ts;
262395

263396
local_irq_save(flags);
264397

265398
if (!tick_nohz_full_cpu(smp_processor_id()))
266399
goto out;
267400

268-
if (tick_nohz_tick_stopped() && !can_stop_full_tick())
269-
tick_nohz_full_kick();
401+
ts = this_cpu_ptr(&tick_cpu_sched);
270402

403+
if (ts->tick_stopped) {
404+
if (current->tick_dep_mask || current->signal->tick_dep_mask)
405+
tick_nohz_full_kick();
406+
}
271407
out:
272408
local_irq_restore(flags);
273409
}
@@ -736,7 +872,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
736872
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
737873
return;
738874

739-
if (can_stop_full_tick())
875+
if (can_stop_full_tick(ts))
740876
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
741877
else if (ts->tick_stopped)
742878
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);

kernel/time/tick-sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct tick_sched {
6060
u64 next_timer;
6161
ktime_t idle_expires;
6262
int do_timer_last;
63+
unsigned long tick_dep_mask;
6364
};
6465

6566
extern struct tick_sched *tick_get_tick_sched(int cpu);

0 commit comments

Comments
 (0)