Skip to content

Commit 698eff6

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/clock, x86/perf: Fix "perf test tsc"
People reported that commit: 5680d80 ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter <adrian.hunter@intel.com> Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org> Tested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 5680d80 ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 71fdb70 commit 698eff6

File tree

5 files changed

+28
-22
lines changed

5 files changed

+28
-22
lines changed

arch/x86/events/core.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,34 +2256,37 @@ void arch_perf_update_userpage(struct perf_event *event,
22562256
struct perf_event_mmap_page *userpg, u64 now)
22572257
{
22582258
struct cyc2ns_data *data;
2259+
u64 offset;
22592260

22602261
userpg->cap_user_time = 0;
22612262
userpg->cap_user_time_zero = 0;
22622263
userpg->cap_user_rdpmc =
22632264
!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
22642265
userpg->pmc_width = x86_pmu.cntval_bits;
22652266

2266-
if (!sched_clock_stable())
2267+
if (!using_native_sched_clock() || !sched_clock_stable())
22672268
return;
22682269

22692270
data = cyc2ns_read_begin();
22702271

2272+
offset = data->cyc2ns_offset + __sched_clock_offset;
2273+
22712274
/*
22722275
* Internal timekeeping for enabled/running/stopped times
22732276
* is always in the local_clock domain.
22742277
*/
22752278
userpg->cap_user_time = 1;
22762279
userpg->time_mult = data->cyc2ns_mul;
22772280
userpg->time_shift = data->cyc2ns_shift;
2278-
userpg->time_offset = data->cyc2ns_offset - now;
2281+
userpg->time_offset = offset - now;
22792282

22802283
/*
22812284
* cap_user_time_zero doesn't make sense when we're using a different
22822285
* time base for the records.
22832286
*/
22842287
if (!event->attr.use_clockid) {
22852288
userpg->cap_user_time_zero = 1;
2286-
userpg->time_zero = data->cyc2ns_offset;
2289+
userpg->time_zero = offset;
22872290
}
22882291

22892292
cyc2ns_read_end(data);

arch/x86/include/asm/timer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
1212

1313
extern int no_timer_check;
1414

15+
extern bool using_native_sched_clock(void);
16+
1517
/*
1618
* We use the full linear equation: f(x) = a + b*x, in order to allow
1719
* a continuous function in the face of dynamic freq changes.

arch/x86/kernel/tsc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,15 +328,15 @@ unsigned long long sched_clock(void)
328328
return paravirt_sched_clock();
329329
}
330330

331-
static inline bool using_native_sched_clock(void)
331+
bool using_native_sched_clock(void)
332332
{
333333
return pv_time_ops.sched_clock == native_sched_clock;
334334
}
335335
#else
336336
unsigned long long
337337
sched_clock(void) __attribute__((alias("native_sched_clock")));
338338

339-
static inline bool using_native_sched_clock(void) { return true; }
339+
bool using_native_sched_clock(void) { return true; }
340340
#endif
341341

342342
int check_tsc_unstable(void)

include/linux/sched/clock.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
5454
}
5555
#else
5656
extern void sched_clock_init_late(void);
57-
/*
58-
* Architectures can set this to 1 if they have specified
59-
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
60-
* but then during bootup it turns out that sched_clock()
61-
* is reliable after all:
62-
*/
6357
extern int sched_clock_stable(void);
6458
extern void clear_sched_clock_stable(void);
6559

60+
/*
61+
* When sched_clock_stable(), __sched_clock_offset provides the offset
62+
* between local_clock() and sched_clock().
63+
*/
64+
extern u64 __sched_clock_offset;
65+
66+
6667
extern void sched_clock_tick(void);
6768
extern void sched_clock_idle_sleep_event(void);
6869
extern void sched_clock_idle_wakeup_event(u64 delta_ns);

kernel/sched/clock.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
9696
static int __sched_clock_stable_early = 1;
9797

9898
/*
99-
* We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
99+
* We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
100100
*/
101-
static __read_mostly u64 raw_offset;
102-
static __read_mostly u64 gtod_offset;
101+
__read_mostly u64 __sched_clock_offset;
102+
static __read_mostly u64 __gtod_offset;
103103

104104
struct sched_clock_data {
105105
u64 tick_raw;
@@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void)
131131
/*
132132
* Attempt to make the (initial) unstable->stable transition continuous.
133133
*/
134-
raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
134+
__sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
135135

136136
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
137-
scd->tick_gtod, gtod_offset,
138-
scd->tick_raw, raw_offset);
137+
scd->tick_gtod, __gtod_offset,
138+
scd->tick_raw, __sched_clock_offset);
139139

140140
static_branch_enable(&__sched_clock_stable);
141141
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
@@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void)
161161
*
162162
* Still do what we can.
163163
*/
164-
gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
164+
__gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
165165

166166
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
167-
scd->tick_gtod, gtod_offset,
168-
scd->tick_raw, raw_offset);
167+
scd->tick_gtod, __gtod_offset,
168+
scd->tick_raw, __sched_clock_offset);
169169

170170
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
171171

@@ -238,7 +238,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
238238
* scd->tick_gtod + TICK_NSEC);
239239
*/
240240

241-
clock = scd->tick_gtod + gtod_offset + delta;
241+
clock = scd->tick_gtod + __gtod_offset + delta;
242242
min_clock = wrap_max(scd->tick_gtod, old_clock);
243243
max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
244244

@@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu)
324324
u64 clock;
325325

326326
if (sched_clock_stable())
327-
return sched_clock() + raw_offset;
327+
return sched_clock() + __sched_clock_offset;
328328

329329
if (unlikely(!sched_clock_running))
330330
return 0ull;

0 commit comments

Comments
 (0)