Skip to content

Commit 0cc3cd2

Browse files
committed
cpu/hotplug: Boot HT siblings at least once
Due to the way Machine Check Exceptions work on X86 hyperthreads it's required to boot up _all_ logical cores at least once in order to set the CR4.MCE bit. So instead of ignoring the sibling threads right away, let them boot up once so they can configure themselves. After they came out of the initial boot stage check whether its a "secondary" sibling and cancel the operation which puts the CPU back into offline state. Reported-by: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tony Luck <tony.luck@intel.com>
1 parent 506a66f commit 0cc3cd2

File tree

1 file changed

+48
-24
lines changed

1 file changed

+48
-24
lines changed

kernel/cpu.c

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct cpuhp_cpu_state {
6060
bool rollback;
6161
bool single;
6262
bool bringup;
63+
bool booted_once;
6364
struct hlist_node *node;
6465
struct hlist_node *last;
6566
enum cpuhp_state cb_state;
@@ -342,6 +343,40 @@ void cpu_hotplug_enable(void)
342343
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
343344
#endif /* CONFIG_HOTPLUG_CPU */
344345

346+
#ifdef CONFIG_HOTPLUG_SMT
347+
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
348+
349+
static int __init smt_cmdline_disable(char *str)
350+
{
351+
cpu_smt_control = CPU_SMT_DISABLED;
352+
if (str && !strcmp(str, "force")) {
353+
pr_info("SMT: Force disabled\n");
354+
cpu_smt_control = CPU_SMT_FORCE_DISABLED;
355+
}
356+
return 0;
357+
}
358+
early_param("nosmt", smt_cmdline_disable);
359+
360+
static inline bool cpu_smt_allowed(unsigned int cpu)
361+
{
362+
if (cpu_smt_control == CPU_SMT_ENABLED)
363+
return true;
364+
365+
if (topology_is_primary_thread(cpu))
366+
return true;
367+
368+
/*
369+
* On x86 it's required to boot all logical CPUs at least once so
370+
* that the init code can get a chance to set CR4.MCE on each
371+
* CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
372+
* core will shutdown the machine.
373+
*/
374+
return !per_cpu(cpuhp_state, cpu).booted_once;
375+
}
376+
#else
377+
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
378+
#endif
379+
345380
static inline enum cpuhp_state
346381
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
347382
{
@@ -422,6 +457,16 @@ static int bringup_wait_for_ap(unsigned int cpu)
422457
stop_machine_unpark(cpu);
423458
kthread_unpark(st->thread);
424459

460+
/*
461+
* SMT soft disabling on X86 requires to bring the CPU out of the
462+
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
463+
* CPU marked itself as booted_once in cpu_notify_starting() so the
464+
* cpu_smt_allowed() check will now return false if this is not the
465+
* primary sibling.
466+
*/
467+
if (!cpu_smt_allowed(cpu))
468+
return -ECANCELED;
469+
425470
if (st->target <= CPUHP_AP_ONLINE_IDLE)
426471
return 0;
427472

@@ -933,29 +978,6 @@ EXPORT_SYMBOL(cpu_down);
933978
#define takedown_cpu NULL
934979
#endif /*CONFIG_HOTPLUG_CPU*/
935980

936-
#ifdef CONFIG_HOTPLUG_SMT
937-
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
938-
939-
static int __init smt_cmdline_disable(char *str)
940-
{
941-
cpu_smt_control = CPU_SMT_DISABLED;
942-
if (str && !strcmp(str, "force")) {
943-
pr_info("SMT: Force disabled\n");
944-
cpu_smt_control = CPU_SMT_FORCE_DISABLED;
945-
}
946-
return 0;
947-
}
948-
early_param("nosmt", smt_cmdline_disable);
949-
950-
static inline bool cpu_smt_allowed(unsigned int cpu)
951-
{
952-
return cpu_smt_control == CPU_SMT_ENABLED ||
953-
topology_is_primary_thread(cpu);
954-
}
955-
#else
956-
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
957-
#endif
958-
959981
/**
960982
* notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
961983
* @cpu: cpu that just started
@@ -970,6 +992,7 @@ void notify_cpu_starting(unsigned int cpu)
970992
int ret;
971993

972994
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
995+
st->booted_once = true;
973996
while (st->state < target) {
974997
st->state++;
975998
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
@@ -2180,5 +2203,6 @@ void __init boot_cpu_init(void)
21802203
*/
21812204
void __init boot_cpu_state_init(void)
21822205
{
2183-
per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
2206+
this_cpu_write(cpuhp_state.booted_once, true);
2207+
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
21842208
}

0 commit comments

Comments
 (0)