Skip to content

Commit 99306df

Browse files
committed
Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 timer updates from Thomas Gleixner: "These updates are related to TSC handling: - Support platforms which have synchronized TSCs but the boot CPU has a non zero TSC_ADJUST value, which is considered a firmware bug on normal systems. This applies to HPE/SGI UV platforms where the platform firmware uses TSC_ADJUST to ensure TSC synchronization across a huge number of sockets, but due to power on timings the boot CPU cannot be guaranteed to have a zero TSC_ADJUST register value. - Fix the ordering of udelay calibration and kvmclock_init() - Cleanup the udelay and calibration code" * 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tsc: Mark cyc2ns_init() and detect_art() __init x86/platform/UV: Mark tsc_check_sync as an init function x86/tsc: Make CONFIG_X86_TSC=n build work again x86/platform/UV: Add check of TSC state set by UV BIOS x86/tsc: Provide a means to disable TSC ART x86/tsc: Drastically reduce the number of firmware bug warnings x86/tsc: Skip TSC test and error messages if already unstable x86/tsc: Add option that TSC on Socket 0 being non-zero is valid x86/timers: Move simple_udelay_calibration() past kvmclock_init() x86/timers: Make recalibrate_cpu_khz() void x86/timers: Move the simple udelay calibration to tsc.h
2 parents 3643b7e + 120fc3f commit 99306df

File tree

7 files changed

+148
-51
lines changed

7 files changed

+148
-51
lines changed

arch/x86/include/asm/timer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#define TICK_SIZE (tick_nsec / 1000)
1010

1111
unsigned long long native_sched_clock(void);
12-
extern int recalibrate_cpu_khz(void);
12+
extern void recalibrate_cpu_khz(void);
1313

1414
extern int no_timer_check;
1515

arch/x86/include/asm/tsc.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void)
3232

3333
extern struct system_counterval_t convert_art_to_tsc(u64 art);
3434

35+
extern void tsc_early_delay_calibrate(void);
3536
extern void tsc_init(void);
3637
extern void mark_tsc_unstable(char *reason);
3738
extern int unsynchronized_tsc(void);
3839
extern int check_tsc_unstable(void);
40+
extern void mark_tsc_async_resets(char *reason);
3941
extern unsigned long native_calibrate_cpu(void);
4042
extern unsigned long native_calibrate_tsc(void);
4143
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
4244

4345
extern int tsc_clocksource_reliable;
46+
#ifdef CONFIG_X86_TSC
47+
extern bool tsc_async_resets;
48+
#else
49+
# define tsc_async_resets false
50+
#endif
4451

4552
/*
4653
* Boot-time check whether the TSCs are synchronized across

arch/x86/include/asm/uv/uv_hub.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void)
776776
extern void uv_nmi_setup(void);
777777
extern void uv_nmi_setup_hubless(void);
778778

779+
/* BIOS/Kernel flags exchange MMR */
780+
#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5
781+
#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS
782+
#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2
783+
784+
/* TSC sync valid, set by BIOS */
785+
#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR
786+
#define UVH_TSC_SYNC_SHIFT 10
787+
#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */
788+
#define UVH_TSC_SYNC_MASK 3 /* 0011 */
789+
#define UVH_TSC_SYNC_VALID 3 /* 0011 */
790+
#define UVH_TSC_SYNC_INVALID 2 /* 0010 */
791+
779792
/* BMC sets a bit this MMR non-zero before sending an NMI */
780-
#define UVH_NMI_MMR UVH_SCRATCH5
781-
#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS
793+
#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR
794+
#define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS
782795
#define UVH_NMI_MMR_SHIFT 63
783-
#define UVH_NMI_MMR_TYPE "SCRATCH5"
796+
#define UVH_NMI_MMR_TYPE "SCRATCH5"
784797

785798
/* Newer SMM NMI handler, not present in all systems */
786799
#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
787800
#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
788801
#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
789-
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
802+
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
790803

791804
/* Non-zero indicates newer SMM NMI handler present */
792805
#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
793806

794807
/* Indicates to BIOS that we want to use the newer SMM NMI handler */
795-
#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2
808+
#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2
796809
#define UVH_NMI_MMRX_REQ_SHIFT 62
797810

798811
struct uv_hub_nmi_s {

arch/x86/kernel/apic/x2apic_uv_x.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void)
154154
return pnode;
155155
}
156156

157+
static void __init uv_tsc_check_sync(void)
158+
{
159+
u64 mmr;
160+
int sync_state;
161+
int mmr_shift;
162+
char *state;
163+
bool valid;
164+
165+
/* Accommodate different UV arch BIOSes */
166+
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
167+
mmr_shift =
168+
is_uv1_hub() ? 0 :
169+
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
170+
if (mmr_shift)
171+
sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
172+
else
173+
sync_state = 0;
174+
175+
switch (sync_state) {
176+
case UVH_TSC_SYNC_VALID:
177+
state = "in sync";
178+
valid = true;
179+
break;
180+
181+
case UVH_TSC_SYNC_INVALID:
182+
state = "unstable";
183+
valid = false;
184+
break;
185+
default:
186+
state = "unknown: assuming valid";
187+
valid = true;
188+
break;
189+
}
190+
pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
191+
192+
/* Mark flag that says TSC != 0 is valid for socket 0 */
193+
if (valid)
194+
mark_tsc_async_resets("UV BIOS");
195+
else
196+
mark_tsc_unstable("UV BIOS");
197+
}
198+
157199
/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
158200

159201
#define SMT_LEVEL 0 /* Leaf 0xb SMT level */
@@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
288330
}
289331

290332
pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
333+
uv_tsc_check_sync();
291334

292335
return uv_apic;
293336

arch/x86/kernel/setup.c

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -812,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
812812
return 0;
813813
}
814814

815-
static void __init simple_udelay_calibration(void)
816-
{
817-
unsigned int tsc_khz, cpu_khz;
818-
unsigned long lpj;
819-
820-
if (!boot_cpu_has(X86_FEATURE_TSC))
821-
return;
822-
823-
cpu_khz = x86_platform.calibrate_cpu();
824-
tsc_khz = x86_platform.calibrate_tsc();
825-
826-
tsc_khz = tsc_khz ? : cpu_khz;
827-
if (!tsc_khz)
828-
return;
829-
830-
lpj = tsc_khz * 1000;
831-
do_div(lpj, HZ);
832-
loops_per_jiffy = lpj;
833-
}
834-
835815
/*
836816
* Determine if we were loaded by an EFI loader. If so, then we have also been
837817
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -1039,8 +1019,6 @@ void __init setup_arch(char **cmdline_p)
10391019
*/
10401020
init_hypervisor_platform();
10411021

1042-
simple_udelay_calibration();
1043-
10441022
x86_init.resources.probe_roms();
10451023

10461024
/* after parse_early_param, so could debug it */
@@ -1125,9 +1103,6 @@ void __init setup_arch(char **cmdline_p)
11251103
memblock_set_current_limit(ISA_END_ADDRESS);
11261104
e820__memblock_setup();
11271105

1128-
if (!early_xdbc_setup_hardware())
1129-
early_xdbc_register_console();
1130-
11311106
reserve_bios_regions();
11321107

11331108
if (efi_enabled(EFI_MEMMAP)) {
@@ -1233,6 +1208,10 @@ void __init setup_arch(char **cmdline_p)
12331208
kvmclock_init();
12341209
#endif
12351210

1211+
tsc_early_delay_calibrate();
1212+
if (!early_xdbc_setup_hardware())
1213+
early_xdbc_register_console();
1214+
12361215
x86_init.paging.pagetable_init();
12371216

12381217
kasan_init();

arch/x86/kernel/tsc.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
112112
data->cyc2ns_offset = 0;
113113
}
114114

115-
static void cyc2ns_init(int cpu)
115+
static void __init cyc2ns_init(int cpu)
116116
{
117117
struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
118118

@@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void)
812812
return tsc_pit_min;
813813
}
814814

815-
int recalibrate_cpu_khz(void)
815+
void recalibrate_cpu_khz(void)
816816
{
817817
#ifndef CONFIG_SMP
818818
unsigned long cpu_khz_old = cpu_khz;
819819

820820
if (!boot_cpu_has(X86_FEATURE_TSC))
821-
return -ENODEV;
821+
return;
822822

823823
cpu_khz = x86_platform.calibrate_cpu();
824824
tsc_khz = x86_platform.calibrate_tsc();
@@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void)
828828
cpu_khz = tsc_khz;
829829
cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
830830
cpu_khz_old, cpu_khz);
831-
832-
return 0;
833-
#else
834-
return -ENODEV;
835831
#endif
836832
}
837833

@@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling);
959955
/*
960956
* If ART is present detect the numerator:denominator to convert to TSC
961957
*/
962-
static void detect_art(void)
958+
static void __init detect_art(void)
963959
{
964960
unsigned int unused[2];
965961

966962
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
967963
return;
968964

969-
/* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
965+
/*
966+
* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
967+
* and the TSC counter resets must not occur asynchronously.
968+
*/
970969
if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
971970
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
972-
!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
971+
!boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
972+
tsc_async_resets)
973973
return;
974974

975975
cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
@@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void)
12631263
*/
12641264
device_initcall(init_tsc_clocksource);
12651265

1266+
void __init tsc_early_delay_calibrate(void)
1267+
{
1268+
unsigned long lpj;
1269+
1270+
if (!boot_cpu_has(X86_FEATURE_TSC))
1271+
return;
1272+
1273+
cpu_khz = x86_platform.calibrate_cpu();
1274+
tsc_khz = x86_platform.calibrate_tsc();
1275+
1276+
tsc_khz = tsc_khz ? : cpu_khz;
1277+
if (!tsc_khz)
1278+
return;
1279+
1280+
lpj = tsc_khz * 1000;
1281+
do_div(lpj, HZ);
1282+
loops_per_jiffy = lpj;
1283+
}
1284+
12661285
void __init tsc_init(void)
12671286
{
12681287
u64 lpj, cyc;

arch/x86/kernel/tsc_sync.c

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ struct tsc_adjust {
3131

3232
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
3333

34+
/*
35+
* TSC's on different sockets may be reset asynchronously.
36+
* This may cause the TSC ADJUST value on socket 0 to be NOT 0.
37+
*/
38+
bool __read_mostly tsc_async_resets;
39+
40+
void mark_tsc_async_resets(char *reason)
41+
{
42+
if (tsc_async_resets)
43+
return;
44+
tsc_async_resets = true;
45+
pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
46+
}
47+
3448
void tsc_verify_tsc_adjust(bool resume)
3549
{
3650
struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
@@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume)
3953
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
4054
return;
4155

56+
/* Skip unnecessary error messages if TSC already unstable */
57+
if (check_tsc_unstable())
58+
return;
59+
4260
/* Rate limit the MSR check */
4361
if (!resume && time_before(jiffies, adj->nextcheck))
4462
return;
@@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
7290
* non zero. We don't do that on non boot cpus because physical
7391
* hotplug should have set the ADJUST register to a value > 0 so
7492
* the TSC is in sync with the already running cpus.
93+
*
94+
* Also don't force the ADJUST value to zero if that is a valid value
95+
* for socket 0 as determined by the system arch. This is required
96+
* when multiple sockets are reset asynchronously with each other
97+
* and socket 0 may not have an TSC ADJUST value of 0.
7598
*/
7699
if (bootcpu && bootval != 0) {
77-
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
78-
bootval);
79-
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
80-
bootval = 0;
100+
if (likely(!tsc_async_resets)) {
101+
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
102+
cpu, bootval);
103+
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
104+
bootval = 0;
105+
} else {
106+
pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
107+
cpu, bootval);
108+
}
81109
}
82110
cur->adjusted = bootval;
83111
}
@@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
91119
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
92120
return false;
93121

122+
/* Skip unnecessary error messages if TSC already unstable */
123+
if (check_tsc_unstable())
124+
return false;
125+
94126
rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
95127
cur->bootval = bootval;
96128
cur->nextcheck = jiffies + HZ;
@@ -118,6 +150,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
118150
cur->nextcheck = jiffies + HZ;
119151
cur->warned = false;
120152

153+
/*
154+
* If a non-zero TSC value for socket 0 may be valid then the default
155+
* adjusted value cannot assumed to be zero either.
156+
*/
157+
if (tsc_async_resets)
158+
cur->adjusted = bootval;
159+
121160
/*
122161
* Check whether this CPU is the first in a package to come up. In
123162
* this case do not check the boot value against another package
@@ -139,19 +178,16 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
139178
* Compare the boot value and complain if it differs in the
140179
* package.
141180
*/
142-
if (bootval != ref->bootval) {
143-
pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
144-
refcpu, ref->bootval, cpu, bootval);
145-
}
181+
if (bootval != ref->bootval)
182+
printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n");
183+
146184
/*
147185
* The TSC_ADJUST values in a package must be the same. If the boot
148186
* value on this newly upcoming CPU differs from the adjustment
149187
* value of the already online CPU in this package, set it to that
150188
* adjusted value.
151189
*/
152190
if (bootval != ref->adjusted) {
153-
pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
154-
refcpu, ref->adjusted, cpu, bootval);
155191
cur->adjusted = ref->adjusted;
156192
wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
157193
}

0 commit comments

Comments
 (0)