Skip to content

Commit 034bda1

Browse files
committed
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Two main changes: - Cleanups, simplifications and CLOCK_TAI support (Thomas Gleixner) - Improve code generation (Andy Lutomirski)" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Rearrange do_hres() to improve code generation x86/vdso: Document vgtod_ts better x86/vdso: Remove "memory" clobbers in the vDSO syscall fallbacks x66/vdso: Add CLOCK_TAI support x86/vdso: Move cycle_last handling into the caller x86/vdso: Simplify the invalid vclock case x86/vdso: Replace the clockid switch case x86/vdso: Collapse coarse functions x86/vdso: Collapse high resolution functions x86/vdso: Introduce and use vgtod_ts x86/vdso: Use unsigned int consistently for vsyscall_gtod_data:: Seq x86/vdso: Enforce 64bit clocksource x86/time: Implement clocksource_arch_init() clocksource: Provide clocksource_arch_init()
2 parents d82924c + 99c19e6 commit 034bda1

File tree

8 files changed

+155
-193
lines changed

8 files changed

+155
-193
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ config X86
4848
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
4949
select ANON_INODES
5050
select ARCH_CLOCKSOURCE_DATA
51+
select ARCH_CLOCKSOURCE_INIT
5152
select ARCH_DISCARD_MEMBLOCK
5253
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
5354
select ARCH_HAS_DEBUG_VIRTUAL

arch/x86/entry/vdso/vclock_gettime.c

Lines changed: 60 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,10 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
4545
long ret;
4646
asm ("syscall" : "=a" (ret), "=m" (*ts) :
4747
"0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
48-
"memory", "rcx", "r11");
48+
"rcx", "r11");
4949
return ret;
5050
}
5151

52-
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
53-
{
54-
long ret;
55-
56-
asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
57-
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
58-
"memory", "rcx", "r11");
59-
return ret;
60-
}
61-
62-
6352
#else
6453

6554
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -73,22 +62,7 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
7362
"mov %%edx, %%ebx \n"
7463
: "=a" (ret), "=m" (*ts)
7564
: "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
76-
: "memory", "edx");
77-
return ret;
78-
}
79-
80-
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
81-
{
82-
long ret;
83-
84-
asm (
85-
"mov %%ebx, %%edx \n"
86-
"mov %[tv], %%ebx \n"
87-
"call __kernel_vsyscall \n"
88-
"mov %%edx, %%ebx \n"
89-
: "=a" (ret), "=m" (*tv), "=m" (*tz)
90-
: "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
91-
: "memory", "edx");
65+
: "edx");
9266
return ret;
9367
}
9468

@@ -100,12 +74,11 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
10074
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
10175
}
10276

103-
static notrace u64 vread_pvclock(int *mode)
77+
static notrace u64 vread_pvclock(void)
10478
{
10579
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
106-
u64 ret;
107-
u64 last;
10880
u32 version;
81+
u64 ret;
10982

11083
/*
11184
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -132,175 +105,112 @@ static notrace u64 vread_pvclock(int *mode)
132105
do {
133106
version = pvclock_read_begin(pvti);
134107

135-
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
136-
*mode = VCLOCK_NONE;
137-
return 0;
138-
}
108+
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
109+
return U64_MAX;
139110

140111
ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
141112
} while (pvclock_read_retry(pvti, version));
142113

143-
/* refer to vread_tsc() comment for rationale */
144-
last = gtod->cycle_last;
145-
146-
if (likely(ret >= last))
147-
return ret;
148-
149-
return last;
114+
return ret;
150115
}
151116
#endif
152117
#ifdef CONFIG_HYPERV_TSCPAGE
153-
static notrace u64 vread_hvclock(int *mode)
118+
static notrace u64 vread_hvclock(void)
154119
{
155120
const struct ms_hyperv_tsc_page *tsc_pg =
156121
(const struct ms_hyperv_tsc_page *)&hvclock_page;
157-
u64 current_tick = hv_read_tsc_page(tsc_pg);
158-
159-
if (current_tick != U64_MAX)
160-
return current_tick;
161122

162-
*mode = VCLOCK_NONE;
163-
return 0;
123+
return hv_read_tsc_page(tsc_pg);
164124
}
165125
#endif
166126

167-
notrace static u64 vread_tsc(void)
127+
notrace static inline u64 vgetcyc(int mode)
168128
{
169-
u64 ret = (u64)rdtsc_ordered();
170-
u64 last = gtod->cycle_last;
171-
172-
if (likely(ret >= last))
173-
return ret;
174-
175-
/*
176-
* GCC likes to generate cmov here, but this branch is extremely
177-
* predictable (it's just a function of time and the likely is
178-
* very likely) and there's a data dependence, so force GCC
179-
* to generate a branch instead. I don't barrier() because
180-
* we don't actually need a barrier, and if this function
181-
* ever gets inlined it will generate worse code.
182-
*/
183-
asm volatile ("");
184-
return last;
185-
}
186-
187-
notrace static inline u64 vgetsns(int *mode)
188-
{
189-
u64 v;
190-
cycles_t cycles;
191-
192-
if (gtod->vclock_mode == VCLOCK_TSC)
193-
cycles = vread_tsc();
129+
if (mode == VCLOCK_TSC)
130+
return (u64)rdtsc_ordered();
194131
#ifdef CONFIG_PARAVIRT_CLOCK
195-
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
196-
cycles = vread_pvclock(mode);
132+
else if (mode == VCLOCK_PVCLOCK)
133+
return vread_pvclock();
197134
#endif
198135
#ifdef CONFIG_HYPERV_TSCPAGE
199-
else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
200-
cycles = vread_hvclock(mode);
136+
else if (mode == VCLOCK_HVCLOCK)
137+
return vread_hvclock();
201138
#endif
202-
else
203-
return 0;
204-
v = (cycles - gtod->cycle_last) & gtod->mask;
205-
return v * gtod->mult;
139+
return U64_MAX;
206140
}
207141

208-
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
209-
notrace static int __always_inline do_realtime(struct timespec *ts)
142+
notrace static int do_hres(clockid_t clk, struct timespec *ts)
210143
{
211-
unsigned long seq;
212-
u64 ns;
213-
int mode;
144+
struct vgtod_ts *base = &gtod->basetime[clk];
145+
u64 cycles, last, sec, ns;
146+
unsigned int seq;
214147

215148
do {
216149
seq = gtod_read_begin(gtod);
217-
mode = gtod->vclock_mode;
218-
ts->tv_sec = gtod->wall_time_sec;
219-
ns = gtod->wall_time_snsec;
220-
ns += vgetsns(&mode);
150+
cycles = vgetcyc(gtod->vclock_mode);
151+
ns = base->nsec;
152+
last = gtod->cycle_last;
153+
if (unlikely((s64)cycles < 0))
154+
return vdso_fallback_gettime(clk, ts);
155+
if (cycles > last)
156+
ns += (cycles - last) * gtod->mult;
221157
ns >>= gtod->shift;
158+
sec = base->sec;
222159
} while (unlikely(gtod_read_retry(gtod, seq)));
223160

224-
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
161+
/*
162+
* Do this outside the loop: a race inside the loop could result
163+
* in __iter_div_u64_rem() being extremely slow.
164+
*/
165+
ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
225166
ts->tv_nsec = ns;
226167

227-
return mode;
168+
return 0;
228169
}
229170

230-
notrace static int __always_inline do_monotonic(struct timespec *ts)
171+
notrace static void do_coarse(clockid_t clk, struct timespec *ts)
231172
{
232-
unsigned long seq;
233-
u64 ns;
234-
int mode;
173+
struct vgtod_ts *base = &gtod->basetime[clk];
174+
unsigned int seq;
235175

236176
do {
237177
seq = gtod_read_begin(gtod);
238-
mode = gtod->vclock_mode;
239-
ts->tv_sec = gtod->monotonic_time_sec;
240-
ns = gtod->monotonic_time_snsec;
241-
ns += vgetsns(&mode);
242-
ns >>= gtod->shift;
178+
ts->tv_sec = base->sec;
179+
ts->tv_nsec = base->nsec;
243180
} while (unlikely(gtod_read_retry(gtod, seq)));
244-
245-
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
246-
ts->tv_nsec = ns;
247-
248-
return mode;
249181
}
250182

251-
notrace static void do_realtime_coarse(struct timespec *ts)
183+
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
252184
{
253-
unsigned long seq;
254-
do {
255-
seq = gtod_read_begin(gtod);
256-
ts->tv_sec = gtod->wall_time_coarse_sec;
257-
ts->tv_nsec = gtod->wall_time_coarse_nsec;
258-
} while (unlikely(gtod_read_retry(gtod, seq)));
259-
}
185+
unsigned int msk;
260186

261-
notrace static void do_monotonic_coarse(struct timespec *ts)
262-
{
263-
unsigned long seq;
264-
do {
265-
seq = gtod_read_begin(gtod);
266-
ts->tv_sec = gtod->monotonic_time_coarse_sec;
267-
ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
268-
} while (unlikely(gtod_read_retry(gtod, seq)));
269-
}
187+
/* Sort out negative (CPU/FD) and invalid clocks */
188+
if (unlikely((unsigned int) clock >= MAX_CLOCKS))
189+
return vdso_fallback_gettime(clock, ts);
270190

271-
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
272-
{
273-
switch (clock) {
274-
case CLOCK_REALTIME:
275-
if (do_realtime(ts) == VCLOCK_NONE)
276-
goto fallback;
277-
break;
278-
case CLOCK_MONOTONIC:
279-
if (do_monotonic(ts) == VCLOCK_NONE)
280-
goto fallback;
281-
break;
282-
case CLOCK_REALTIME_COARSE:
283-
do_realtime_coarse(ts);
284-
break;
285-
case CLOCK_MONOTONIC_COARSE:
286-
do_monotonic_coarse(ts);
287-
break;
288-
default:
289-
goto fallback;
191+
/*
192+
* Convert the clockid to a bitmask and use it to check which
193+
* clocks are handled in the VDSO directly.
194+
*/
195+
msk = 1U << clock;
196+
if (likely(msk & VGTOD_HRES)) {
197+
return do_hres(clock, ts);
198+
} else if (msk & VGTOD_COARSE) {
199+
do_coarse(clock, ts);
200+
return 0;
290201
}
291-
292-
return 0;
293-
fallback:
294202
return vdso_fallback_gettime(clock, ts);
295203
}
204+
296205
int clock_gettime(clockid_t, struct timespec *)
297206
__attribute__((weak, alias("__vdso_clock_gettime")));
298207

299208
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
300209
{
301210
if (likely(tv != NULL)) {
302-
if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
303-
return vdso_fallback_gtod(tv, tz);
211+
struct timespec *ts = (struct timespec *) tv;
212+
213+
do_hres(CLOCK_REALTIME, ts);
304214
tv->tv_usec /= 1000;
305215
}
306216
if (unlikely(tz != NULL)) {
@@ -320,7 +230,7 @@ int gettimeofday(struct timeval *, struct timezone *)
320230
notrace time_t __vdso_time(time_t *t)
321231
{
322232
/* This is atomic on x86 so we don't need any locks. */
323-
time_t result = READ_ONCE(gtod->wall_time_sec);
233+
time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
324234

325235
if (t)
326236
*t = result;

arch/x86/entry/vsyscall/vsyscall_gtod.c

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ void update_vsyscall(struct timekeeper *tk)
3131
{
3232
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
3333
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
34+
struct vgtod_ts *base;
35+
u64 nsec;
3436

3537
/* Mark the new vclock used. */
3638
BUILD_BUG_ON(VCLOCK_MAX >= 32);
@@ -45,34 +47,37 @@ void update_vsyscall(struct timekeeper *tk)
4547
vdata->mult = tk->tkr_mono.mult;
4648
vdata->shift = tk->tkr_mono.shift;
4749

48-
vdata->wall_time_sec = tk->xtime_sec;
49-
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
50+
base = &vdata->basetime[CLOCK_REALTIME];
51+
base->sec = tk->xtime_sec;
52+
base->nsec = tk->tkr_mono.xtime_nsec;
5053

51-
vdata->monotonic_time_sec = tk->xtime_sec
52-
+ tk->wall_to_monotonic.tv_sec;
53-
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
54-
+ ((u64)tk->wall_to_monotonic.tv_nsec
55-
<< tk->tkr_mono.shift);
56-
while (vdata->monotonic_time_snsec >=
57-
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
58-
vdata->monotonic_time_snsec -=
59-
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
60-
vdata->monotonic_time_sec++;
61-
}
54+
base = &vdata->basetime[CLOCK_TAI];
55+
base->sec = tk->xtime_sec + (s64)tk->tai_offset;
56+
base->nsec = tk->tkr_mono.xtime_nsec;
6257

63-
vdata->wall_time_coarse_sec = tk->xtime_sec;
64-
vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
65-
tk->tkr_mono.shift);
58+
base = &vdata->basetime[CLOCK_MONOTONIC];
59+
base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
60+
nsec = tk->tkr_mono.xtime_nsec;
61+
nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
62+
while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
63+
nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
64+
base->sec++;
65+
}
66+
base->nsec = nsec;
6667

67-
vdata->monotonic_time_coarse_sec =
68-
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
69-
vdata->monotonic_time_coarse_nsec =
70-
vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
68+
base = &vdata->basetime[CLOCK_REALTIME_COARSE];
69+
base->sec = tk->xtime_sec;
70+
base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
7171

72-
while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
73-
vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
74-
vdata->monotonic_time_coarse_sec++;
72+
base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
73+
base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
74+
nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
75+
nsec += tk->wall_to_monotonic.tv_nsec;
76+
while (nsec >= NSEC_PER_SEC) {
77+
nsec -= NSEC_PER_SEC;
78+
base->sec++;
7579
}
80+
base->nsec = nsec;
7681

7782
gtod_write_end(vdata);
7883
}

0 commit comments

Comments
 (0)