Skip to content

Commit 0672453

Browse files
author
Borislav Petkov
committed
amd64_edac: check NB MCE bank enable on the current node properly
The old code was using smp_call_function_many which skips the current cpu if it is in the supplied cpumask. Switch to the rdmsr_on_cpus() interface which takes care of that. In addition, add get_cpus_on_this_dct_cpumask helper which computes a cpumask of all the cores on a node and thus on a DCT. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
1 parent 57a3085 commit 0672453

File tree

1 file changed

+45
-21
lines changed

1 file changed

+45
-21
lines changed

drivers/edac/amd64_edac.c

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
27412741
wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
27422742
}
27432743

2744-
static void check_mcg_ctl(void *ret)
2744+
/* get all cores on this DCT */
2745+
static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
27452746
{
2746-
u64 msr_val = 0;
2747-
u8 nbe;
2747+
int cpu;
27482748

2749-
rdmsrl(MSR_IA32_MCG_CTL, msr_val);
2750-
nbe = msr_val & K8_MSR_MCGCTL_NBE;
2751-
2752-
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2753-
raw_smp_processor_id(), msr_val,
2754-
(nbe ? "enabled" : "disabled"));
2755-
2756-
if (!nbe)
2757-
*(int *)ret = 0;
2749+
for_each_online_cpu(cpu)
2750+
if (amd_get_nb_id(cpu) == nid)
2751+
cpumask_set_cpu(cpu, mask);
27582752
}
27592753

27602754
/* check MCG_CTL on all the cpus on this node */
2761-
static int mcg_ctl_enabled_on_node(const struct cpumask *mask)
2755+
static bool amd64_nb_mce_bank_enabled_on_node(int nid)
27622756
{
2763-
int ret = 1;
2764-
preempt_disable();
2765-
smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
2766-
preempt_enable();
2757+
cpumask_t mask;
2758+
struct msr *msrs;
2759+
int cpu, nbe, idx = 0;
2760+
bool ret = false;
2761+
2762+
cpumask_clear(&mask);
2763+
2764+
get_cpus_on_this_dct_cpumask(&mask, nid);
2765+
2766+
msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
2767+
if (!msrs) {
2768+
amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2769+
__func__);
2770+
return false;
2771+
}
2772+
2773+
rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
2774+
2775+
for_each_cpu(cpu, &mask) {
2776+
nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
2777+
2778+
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2779+
cpu, msrs[idx].q,
2780+
(nbe ? "enabled" : "disabled"));
2781+
2782+
if (!nbe)
2783+
goto out;
2784+
2785+
idx++;
2786+
}
2787+
ret = true;
27672788

2789+
out:
2790+
kfree(msrs);
27682791
return ret;
27692792
}
27702793

@@ -2783,7 +2806,8 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
27832806
{
27842807
u32 value;
27852808
int err = 0;
2786-
u8 ecc_enabled = 0, mcg_ctl_en = 0;
2809+
u8 ecc_enabled = 0;
2810+
bool nb_mce_en = false;
27872811

27882812
err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
27892813
if (err)
@@ -2797,13 +2821,13 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
27972821
else
27982822
amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
27992823

2800-
mcg_ctl_en = mcg_ctl_enabled_on_node(cpumask_of_node(pvt->mc_node_id));
2801-
if (!mcg_ctl_en)
2824+
nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
2825+
if (!nb_mce_en)
28022826
amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
28032827
"0x%08x[4] on node %d to enable.\n",
28042828
MSR_IA32_MCG_CTL, pvt->mc_node_id);
28052829

2806-
if (!ecc_enabled || !mcg_ctl_en) {
2830+
if (!ecc_enabled || !nb_mce_en) {
28072831
if (!ecc_enable_override) {
28082832
amd64_printk(KERN_WARNING, "%s", ecc_warning);
28092833
return -ENODEV;

0 commit comments

Comments
 (0)