Skip to content

Commit 5743021

Browse files
Rik van RielIngo Molnar
authored andcommitted
sched/cputime: Count actually elapsed irq & softirq time
Currently, if there was any irq or softirq time during 'ticks' jiffies, the entire period will be accounted as irq or softirq time. This is inaccurate if only a subset of the time was actually spent handling irqs, and could conceivably mis-count all of the ticks during a period as irq time, when there was some irq and some softirq time. This can actually happen when irqtime_account_process_tick is called from account_idle_ticks, which can pass a larger number of ticks down all at once. Fix this by changing irqtime_account_hi_update(), irqtime_account_si_update(), and steal_account_process_ticks() to work with cputime_t time units, and return the amount of time spent in each mode. Rename steal_account_process_ticks() to steal_account_process_time(), to reflect that time is now accounted in cputime_t, instead of ticks. Additionally, have irqtime_account_process_tick() take into account how much time was spent in each of steal, irq, and softirq time. The latter could help improve the accuracy of cputime accounting when returning from idle on a NO_HZ_IDLE CPU. Properly accounting how much time was spent in hardirq and softirq time will also allow the NO_HZ_FULL code to re-use these same functions for hardirq and softirq accounting. Signed-off-by: Rik van Riel <riel@redhat.com> [ Make nsecs_to_cputime64() actually return cputime64_t. ] Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krcmar <rkrcmar@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Link: http://lkml.kernel.org/r/1468421405-20056-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent cefef3a commit 5743021

File tree

2 files changed

+79
-47
lines changed

2 files changed

+79
-47
lines changed

include/asm-generic/cputime_nsecs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
5050
(__force u64)(__ct)
5151
#define nsecs_to_cputime(__nsecs) \
5252
(__force cputime_t)(__nsecs)
53+
#define nsecs_to_cputime64(__nsecs) \
54+
(__force cputime64_t)(__nsecs)
5355

5456

5557
/*

kernel/sched/cputime.c

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr)
7979
}
8080
EXPORT_SYMBOL_GPL(irqtime_account_irq);
8181

82-
static int irqtime_account_hi_update(void)
82+
static cputime_t irqtime_account_hi_update(cputime_t maxtime)
8383
{
8484
u64 *cpustat = kcpustat_this_cpu->cpustat;
8585
unsigned long flags;
86-
u64 latest_ns;
87-
int ret = 0;
86+
cputime_t irq_cputime;
8887

8988
local_irq_save(flags);
90-
latest_ns = this_cpu_read(cpu_hardirq_time);
91-
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
92-
ret = 1;
89+
irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
90+
cpustat[CPUTIME_IRQ];
91+
irq_cputime = min(irq_cputime, maxtime);
92+
cpustat[CPUTIME_IRQ] += irq_cputime;
9393
local_irq_restore(flags);
94-
return ret;
94+
return irq_cputime;
9595
}
9696

97-
static int irqtime_account_si_update(void)
97+
static cputime_t irqtime_account_si_update(cputime_t maxtime)
9898
{
9999
u64 *cpustat = kcpustat_this_cpu->cpustat;
100100
unsigned long flags;
101-
u64 latest_ns;
102-
int ret = 0;
101+
cputime_t softirq_cputime;
103102

104103
local_irq_save(flags);
105-
latest_ns = this_cpu_read(cpu_softirq_time);
106-
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
107-
ret = 1;
104+
softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
105+
cpustat[CPUTIME_SOFTIRQ];
106+
softirq_cputime = min(softirq_cputime, maxtime);
107+
cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
108108
local_irq_restore(flags);
109-
return ret;
109+
return softirq_cputime;
110110
}
111111

112112
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
113113

114114
#define sched_clock_irqtime (0)
115115

116+
static cputime_t irqtime_account_hi_update(cputime_t dummy)
117+
{
118+
return 0;
119+
}
120+
121+
static cputime_t irqtime_account_si_update(cputime_t dummy)
122+
{
123+
return 0;
124+
}
125+
116126
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
117127

118128
static inline void task_group_account_field(struct task_struct *p, int index,
@@ -257,31 +267,44 @@ void account_idle_time(cputime_t cputime)
257267
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
258268
}
259269

260-
static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies)
270+
static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
261271
{
262272
#ifdef CONFIG_PARAVIRT
263273
if (static_key_false(&paravirt_steal_enabled)) {
274+
cputime_t steal_cputime;
264275
u64 steal;
265-
unsigned long steal_jiffies;
266276

267277
steal = paravirt_steal_clock(smp_processor_id());
268278
steal -= this_rq()->prev_steal_time;
269279

270-
/*
271-
* steal is in nsecs but our caller is expecting steal
272-
* time in jiffies. Lets cast the result to jiffies
273-
* granularity and account the rest on the next rounds.
274-
*/
275-
steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
276-
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
280+
steal_cputime = min(nsecs_to_cputime(steal), maxtime);
281+
account_steal_time(steal_cputime);
282+
this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
277283

278-
account_steal_time(jiffies_to_cputime(steal_jiffies));
279-
return steal_jiffies;
284+
return steal_cputime;
280285
}
281286
#endif
282287
return 0;
283288
}
284289

290+
/*
291+
* Account how much elapsed time was spent in steal, irq, or softirq time.
292+
*/
293+
static inline cputime_t account_other_time(cputime_t max)
294+
{
295+
cputime_t accounted;
296+
297+
accounted = steal_account_process_time(max);
298+
299+
if (accounted < max)
300+
accounted += irqtime_account_hi_update(max - accounted);
301+
302+
if (accounted < max)
303+
accounted += irqtime_account_si_update(max - accounted);
304+
305+
return accounted;
306+
}
307+
285308
/*
286309
* Accumulate raw cputime values of dead tasks (sig->[us]time) and live
287310
* tasks (sum on group iteration) belonging to @tsk's group.
@@ -342,21 +365,23 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
342365
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
343366
struct rq *rq, int ticks)
344367
{
345-
cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
346-
u64 cputime = (__force u64) cputime_one_jiffy;
347-
u64 *cpustat = kcpustat_this_cpu->cpustat;
368+
u64 cputime = (__force u64) cputime_one_jiffy * ticks;
369+
cputime_t scaled, other;
348370

349-
if (steal_account_process_tick(ULONG_MAX))
371+
/*
372+
* When returning from idle, many ticks can get accounted at
373+
* once, including some ticks of steal, irq, and softirq time.
374+
* Subtract those ticks from the amount of time accounted to
375+
* idle, or potentially user or system time. Due to rounding,
376+
* other time can exceed ticks occasionally.
377+
*/
378+
other = account_other_time(cputime);
379+
if (other >= cputime)
350380
return;
381+
cputime -= other;
382+
scaled = cputime_to_scaled(cputime);
351383

352-
cputime *= ticks;
353-
scaled *= ticks;
354-
355-
if (irqtime_account_hi_update()) {
356-
cpustat[CPUTIME_IRQ] += cputime;
357-
} else if (irqtime_account_si_update()) {
358-
cpustat[CPUTIME_SOFTIRQ] += cputime;
359-
} else if (this_cpu_ksoftirqd() == p) {
384+
if (this_cpu_ksoftirqd() == p) {
360385
/*
361386
* ksoftirqd time do not get accounted in cpu_softirq_time.
362387
* So, we have to handle it separately here.
@@ -466,7 +491,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
466491
*/
467492
void account_process_tick(struct task_struct *p, int user_tick)
468493
{
469-
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
494+
cputime_t cputime, scaled, steal;
470495
struct rq *rq = this_rq();
471496

472497
if (vtime_accounting_cpu_enabled())
@@ -477,16 +502,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
477502
return;
478503
}
479504

480-
if (steal_account_process_tick(ULONG_MAX))
505+
cputime = cputime_one_jiffy;
506+
steal = steal_account_process_time(cputime);
507+
508+
if (steal >= cputime)
481509
return;
482510

511+
cputime -= steal;
512+
scaled = cputime_to_scaled(cputime);
513+
483514
if (user_tick)
484-
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
515+
account_user_time(p, cputime, scaled);
485516
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
486-
account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
487-
one_jiffy_scaled);
517+
account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
488518
else
489-
account_idle_time(cputime_one_jiffy);
519+
account_idle_time(cputime);
490520
}
491521

492522
/*
@@ -681,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
681711
static cputime_t get_vtime_delta(struct task_struct *tsk)
682712
{
683713
unsigned long now = READ_ONCE(jiffies);
684-
unsigned long delta_jiffies, steal_jiffies;
714+
cputime_t delta, steal;
685715

686-
delta_jiffies = now - tsk->vtime_snap;
687-
steal_jiffies = steal_account_process_tick(delta_jiffies);
716+
delta = jiffies_to_cputime(now - tsk->vtime_snap);
717+
steal = steal_account_process_time(delta);
688718
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
689719
tsk->vtime_snap = now;
690720

691-
return jiffies_to_cputime(delta_jiffies - steal_jiffies);
721+
return delta - steal;
692722
}
693723

694724
static void __vtime_account_system(struct task_struct *tsk)

0 commit comments

Comments
 (0)