Skip to content

Commit de55a89

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: amd64_edac: check NB MCE bank enable on the current node properly amd64_edac: Rewrite unganged mode code of f10_early_channel_count amd64_edac: cleanup amd64_check_ecc_enabled x86, EDAC: Provide function to return NodeId of a CPU amd64_edac: build driver only on AMD hardware
2 parents 79b520e + 0672453 commit de55a89

File tree

5 files changed

+88
-103
lines changed

5 files changed

+88
-103
lines changed

arch/x86/include/asm/processor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,4 +1020,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
10201020
extern int get_tsc_mode(unsigned long adr);
10211021
extern int set_tsc_mode(unsigned int val);
10221022

1023+
extern int amd_get_nb_id(int cpu);
1024+
10231025
#endif /* _ASM_X86_PROCESSOR_H */

arch/x86/kernel/cpu/amd.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
333333
#endif
334334
}
335335

336+
int amd_get_nb_id(int cpu)
337+
{
338+
int id = 0;
339+
#ifdef CONFIG_SMP
340+
id = per_cpu(cpu_llc_id, cpu);
341+
#endif
342+
return id;
343+
}
344+
EXPORT_SYMBOL_GPL(amd_get_nb_id);
345+
336346
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
337347
{
338348
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)

drivers/edac/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ config EDAC_MM_EDAC
5959

6060
config EDAC_AMD64
6161
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
62-
depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI
62+
depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD
6363
help
6464
Support for error detection and correction on the AMD 64
6565
Families of Memory Controllers (K8, F10h and F11h)

drivers/edac/amd64_edac.c

Lines changed: 74 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
12551255
*/
12561256
static int f10_early_channel_count(struct amd64_pvt *pvt)
12571257
{
1258+
int dbams[] = { DBAM0, DBAM1 };
12581259
int err = 0, channels = 0;
1260+
int i, j;
12591261
u32 dbam;
12601262

12611263
err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
@@ -1288,46 +1290,19 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
12881290
* is more than just one DIMM present in unganged mode. Need to check
12891291
* both controllers since DIMMs can be placed in either one.
12901292
*/
1291-
channels = 0;
1292-
err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam);
1293-
if (err)
1294-
goto err_reg;
1295-
1296-
if (DBAM_DIMM(0, dbam) > 0)
1297-
channels++;
1298-
if (DBAM_DIMM(1, dbam) > 0)
1299-
channels++;
1300-
if (DBAM_DIMM(2, dbam) > 0)
1301-
channels++;
1302-
if (DBAM_DIMM(3, dbam) > 0)
1303-
channels++;
1304-
1305-
/* If more than 2 DIMMs are present, then we have 2 channels */
1306-
if (channels > 2)
1307-
channels = 2;
1308-
else if (channels == 0) {
1309-
/* No DIMMs on DCT0, so look at DCT1 */
1310-
err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
1293+
for (i = 0; i < ARRAY_SIZE(dbams); i++) {
1294+
err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam);
13111295
if (err)
13121296
goto err_reg;
13131297

1314-
if (DBAM_DIMM(0, dbam) > 0)
1315-
channels++;
1316-
if (DBAM_DIMM(1, dbam) > 0)
1317-
channels++;
1318-
if (DBAM_DIMM(2, dbam) > 0)
1319-
channels++;
1320-
if (DBAM_DIMM(3, dbam) > 0)
1321-
channels++;
1322-
1323-
if (channels > 2)
1324-
channels = 2;
1298+
for (j = 0; j < 4; j++) {
1299+
if (DBAM_DIMM(j, dbam) > 0) {
1300+
channels++;
1301+
break;
1302+
}
1303+
}
13251304
}
13261305

1327-
/* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
1328-
if (channels == 0)
1329-
channels = 1;
1330-
13311306
debugf0("MCT channel count: %d\n", channels);
13321307

13331308
return channels;
@@ -2766,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
27662741
wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
27672742
}
27682743

2769-
static void check_mcg_ctl(void *ret)
2744+
/* get all cores on this DCT */
2745+
static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
27702746
{
2771-
u64 msr_val = 0;
2772-
u8 nbe;
2773-
2774-
rdmsrl(MSR_IA32_MCG_CTL, msr_val);
2775-
nbe = msr_val & K8_MSR_MCGCTL_NBE;
2776-
2777-
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2778-
raw_smp_processor_id(), msr_val,
2779-
(nbe ? "enabled" : "disabled"));
2747+
int cpu;
27802748

2781-
if (!nbe)
2782-
*(int *)ret = 0;
2749+
for_each_online_cpu(cpu)
2750+
if (amd_get_nb_id(cpu) == nid)
2751+
cpumask_set_cpu(cpu, mask);
27832752
}
27842753

27852754
/* check MCG_CTL on all the cpus on this node */
2786-
static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
2755+
static bool amd64_nb_mce_bank_enabled_on_node(int nid)
27872756
{
2788-
int ret = 1;
2789-
preempt_disable();
2790-
smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
2791-
preempt_enable();
2757+
cpumask_t mask;
2758+
struct msr *msrs;
2759+
int cpu, nbe, idx = 0;
2760+
bool ret = false;
27922761

2762+
cpumask_clear(&mask);
2763+
2764+
get_cpus_on_this_dct_cpumask(&mask, nid);
2765+
2766+
msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
2767+
if (!msrs) {
2768+
amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2769+
__func__);
2770+
return false;
2771+
}
2772+
2773+
rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
2774+
2775+
for_each_cpu(cpu, &mask) {
2776+
nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
2777+
2778+
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2779+
cpu, msrs[idx].q,
2780+
(nbe ? "enabled" : "disabled"));
2781+
2782+
if (!nbe)
2783+
goto out;
2784+
2785+
idx++;
2786+
}
2787+
ret = true;
2788+
2789+
out:
2790+
kfree(msrs);
27932791
return ret;
27942792
}
27952793

@@ -2799,71 +2797,46 @@ static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
27992797
* the memory system completely. A command line option allows to force-enable
28002798
* hardware ECC later in amd64_enable_ecc_error_reporting().
28012799
*/
2800+
static const char *ecc_warning =
2801+
"WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
2802+
" Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
2803+
" Also, use of the override can cause unknown side effects.\n";
2804+
28022805
static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
28032806
{
28042807
u32 value;
2805-
int err = 0, ret = 0;
2808+
int err = 0;
28062809
u8 ecc_enabled = 0;
2810+
bool nb_mce_en = false;
28072811

28082812
err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
28092813
if (err)
28102814
debugf0("Reading K8_NBCTL failed\n");
28112815

28122816
ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
2817+
if (!ecc_enabled)
2818+
amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
2819+
"is currently disabled, set F3x%x[22] (%s).\n",
2820+
K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2821+
else
2822+
amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
28132823

2814-
ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
2815-
2816-
debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value,
2817-
(value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
2818-
2819-
if (!ecc_enabled || !ret) {
2820-
if (!ecc_enabled) {
2821-
amd64_printk(KERN_WARNING, "This node reports that "
2822-
"Memory ECC is currently "
2823-
"disabled.\n");
2824+
nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
2825+
if (!nb_mce_en)
2826+
amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
2827+
"0x%08x[4] on node %d to enable.\n",
2828+
MSR_IA32_MCG_CTL, pvt->mc_node_id);
28242829

2825-
amd64_printk(KERN_WARNING, "bit 0x%lx in register "
2826-
"F3x%x of the MISC_CONTROL device (%s) "
2827-
"should be enabled\n", K8_NBCFG_ECC_ENABLE,
2828-
K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2829-
}
2830-
if (!ret) {
2831-
amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
2832-
"of node %d should be enabled\n",
2833-
K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
2834-
pvt->mc_node_id);
2835-
}
2830+
if (!ecc_enabled || !nb_mce_en) {
28362831
if (!ecc_enable_override) {
2837-
amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
2838-
"currently enabled by the BIOS. Module "
2839-
"will NOT be loaded.\n"
2840-
" Either Enable ECC in the BIOS, "
2841-
"or use the 'ecc_enable_override' "
2842-
"parameter.\n"
2843-
" Might be a BIOS bug, if BIOS says "
2844-
"ECC is enabled\n"
2845-
" Use of the override can cause "
2846-
"unknown side effects.\n");
2847-
ret = -ENODEV;
2848-
} else
2849-
/*
2850-
* enable further driver loading if ECC enable is
2851-
* overridden.
2852-
*/
2853-
ret = 0;
2854-
} else {
2855-
amd64_printk(KERN_INFO,
2856-
"ECC is enabled by BIOS, Proceeding "
2857-
"with EDAC module initialization\n");
2858-
2859-
/* Signal good ECC status */
2860-
ret = 0;
2861-
2832+
amd64_printk(KERN_WARNING, "%s", ecc_warning);
2833+
return -ENODEV;
2834+
}
2835+
} else
28622836
/* CLEAR the override, since BIOS controlled it */
28632837
ecc_enable_override = 0;
2864-
}
28652838

2866-
return ret;
2839+
return 0;
28672840
}
28682841

28692842
struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +

drivers/edac/edac_mce_amd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ void decode_mce(struct mce *m)
405405
regs.nbsh = (u32)(m->status >> 32);
406406
regs.nbeal = (u32) m->addr;
407407
regs.nbeah = (u32)(m->addr >> 32);
408-
node = per_cpu(cpu_llc_id, m->extcpu);
408+
node = amd_get_nb_id(m->extcpu);
409409

410410
amd_decode_nb_mce(node, &regs, 1);
411411
break;

0 commit comments

Comments
 (0)