@@ -1255,7 +1255,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
1255
1255
*/
1256
1256
static int f10_early_channel_count (struct amd64_pvt * pvt )
1257
1257
{
1258
+ int dbams [] = { DBAM0 , DBAM1 };
1258
1259
int err = 0 , channels = 0 ;
1260
+ int i , j ;
1259
1261
u32 dbam ;
1260
1262
1261
1263
err = pci_read_config_dword (pvt -> dram_f2_ctl , F10_DCLR_0 , & pvt -> dclr0 );
@@ -1288,46 +1290,19 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
1288
1290
* is more than just one DIMM present in unganged mode. Need to check
1289
1291
* both controllers since DIMMs can be placed in either one.
1290
1292
*/
1291
- channels = 0 ;
1292
- err = pci_read_config_dword (pvt -> dram_f2_ctl , DBAM0 , & dbam );
1293
- if (err )
1294
- goto err_reg ;
1295
-
1296
- if (DBAM_DIMM (0 , dbam ) > 0 )
1297
- channels ++ ;
1298
- if (DBAM_DIMM (1 , dbam ) > 0 )
1299
- channels ++ ;
1300
- if (DBAM_DIMM (2 , dbam ) > 0 )
1301
- channels ++ ;
1302
- if (DBAM_DIMM (3 , dbam ) > 0 )
1303
- channels ++ ;
1304
-
1305
- /* If more than 2 DIMMs are present, then we have 2 channels */
1306
- if (channels > 2 )
1307
- channels = 2 ;
1308
- else if (channels == 0 ) {
1309
- /* No DIMMs on DCT0, so look at DCT1 */
1310
- err = pci_read_config_dword (pvt -> dram_f2_ctl , DBAM1 , & dbam );
1293
+ for (i = 0 ; i < ARRAY_SIZE (dbams ); i ++ ) {
1294
+ err = pci_read_config_dword (pvt -> dram_f2_ctl , dbams [i ], & dbam );
1311
1295
if (err )
1312
1296
goto err_reg ;
1313
1297
1314
- if (DBAM_DIMM (0 , dbam ) > 0 )
1315
- channels ++ ;
1316
- if (DBAM_DIMM (1 , dbam ) > 0 )
1317
- channels ++ ;
1318
- if (DBAM_DIMM (2 , dbam ) > 0 )
1319
- channels ++ ;
1320
- if (DBAM_DIMM (3 , dbam ) > 0 )
1321
- channels ++ ;
1322
-
1323
- if (channels > 2 )
1324
- channels = 2 ;
1298
+ for (j = 0 ; j < 4 ; j ++ ) {
1299
+ if (DBAM_DIMM (j , dbam ) > 0 ) {
1300
+ channels ++ ;
1301
+ break ;
1302
+ }
1303
+ }
1325
1304
}
1326
1305
1327
- /* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
1328
- if (channels == 0 )
1329
- channels = 1 ;
1330
-
1331
1306
debugf0 ("MCT channel count: %d\n" , channels );
1332
1307
1333
1308
return channels ;
@@ -2766,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
2766
2741
wrmsr_on_cpus (cpumask , K8_MSR_MCGCTL , msrs );
2767
2742
}
2768
2743
2769
- static void check_mcg_ctl (void * ret )
2744
+ /* get all cores on this DCT */
2745
+ static void get_cpus_on_this_dct_cpumask (cpumask_t * mask , int nid )
2770
2746
{
2771
- u64 msr_val = 0 ;
2772
- u8 nbe ;
2773
-
2774
- rdmsrl (MSR_IA32_MCG_CTL , msr_val );
2775
- nbe = msr_val & K8_MSR_MCGCTL_NBE ;
2776
-
2777
- debugf0 ("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n" ,
2778
- raw_smp_processor_id (), msr_val ,
2779
- (nbe ? "enabled" : "disabled" ));
2747
+ int cpu ;
2780
2748
2781
- if (!nbe )
2782
- * (int * )ret = 0 ;
2749
+ for_each_online_cpu (cpu )
2750
+ if (amd_get_nb_id (cpu ) == nid )
2751
+ cpumask_set_cpu (cpu , mask );
2783
2752
}
2784
2753
2785
2754
/* check MCG_CTL on all the cpus on this node */
2786
- static int amd64_mcg_ctl_enabled_on_cpus ( const cpumask_t * mask )
2755
+ static bool amd64_nb_mce_bank_enabled_on_node ( int nid )
2787
2756
{
2788
- int ret = 1 ;
2789
- preempt_disable () ;
2790
- smp_call_function_many ( mask , check_mcg_ctl , & ret , 1 ) ;
2791
- preempt_enable () ;
2757
+ cpumask_t mask ;
2758
+ struct msr * msrs ;
2759
+ int cpu , nbe , idx = 0 ;
2760
+ bool ret = false ;
2792
2761
2762
+ cpumask_clear (& mask );
2763
+
2764
+ get_cpus_on_this_dct_cpumask (& mask , nid );
2765
+
2766
+ msrs = kzalloc (sizeof (struct msr ) * cpumask_weight (& mask ), GFP_KERNEL );
2767
+ if (!msrs ) {
2768
+ amd64_printk (KERN_WARNING , "%s: error allocating msrs\n" ,
2769
+ __func__ );
2770
+ return false;
2771
+ }
2772
+
2773
+ rdmsr_on_cpus (& mask , MSR_IA32_MCG_CTL , msrs );
2774
+
2775
+ for_each_cpu (cpu , & mask ) {
2776
+ nbe = msrs [idx ].l & K8_MSR_MCGCTL_NBE ;
2777
+
2778
+ debugf0 ("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n" ,
2779
+ cpu , msrs [idx ].q ,
2780
+ (nbe ? "enabled" : "disabled" ));
2781
+
2782
+ if (!nbe )
2783
+ goto out ;
2784
+
2785
+ idx ++ ;
2786
+ }
2787
+ ret = true;
2788
+
2789
+ out :
2790
+ kfree (msrs );
2793
2791
return ret ;
2794
2792
}
2795
2793
@@ -2799,71 +2797,46 @@ static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
2799
2797
* the memory system completely. A command line option allows to force-enable
2800
2798
* hardware ECC later in amd64_enable_ecc_error_reporting().
2801
2799
*/
2800
+ static const char * ecc_warning =
2801
+ "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
2802
+ " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
2803
+ " Also, use of the override can cause unknown side effects.\n" ;
2804
+
2802
2805
static int amd64_check_ecc_enabled (struct amd64_pvt * pvt )
2803
2806
{
2804
2807
u32 value ;
2805
- int err = 0 , ret = 0 ;
2808
+ int err = 0 ;
2806
2809
u8 ecc_enabled = 0 ;
2810
+ bool nb_mce_en = false;
2807
2811
2808
2812
err = pci_read_config_dword (pvt -> misc_f3_ctl , K8_NBCFG , & value );
2809
2813
if (err )
2810
2814
debugf0 ("Reading K8_NBCTL failed\n" );
2811
2815
2812
2816
ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE );
2817
+ if (!ecc_enabled )
2818
+ amd64_printk (KERN_WARNING , "This node reports that Memory ECC "
2819
+ "is currently disabled, set F3x%x[22] (%s).\n" ,
2820
+ K8_NBCFG , pci_name (pvt -> misc_f3_ctl ));
2821
+ else
2822
+ amd64_printk (KERN_INFO , "ECC is enabled by BIOS.\n" );
2813
2823
2814
- ret = amd64_mcg_ctl_enabled_on_cpus (cpumask_of_node (pvt -> mc_node_id ));
2815
-
2816
- debugf0 ("K8_NBCFG=0x%x, DRAM ECC is %s\n" , value ,
2817
- (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled" ));
2818
-
2819
- if (!ecc_enabled || !ret ) {
2820
- if (!ecc_enabled ) {
2821
- amd64_printk (KERN_WARNING , "This node reports that "
2822
- "Memory ECC is currently "
2823
- "disabled.\n" );
2824
+ nb_mce_en = amd64_nb_mce_bank_enabled_on_node (pvt -> mc_node_id );
2825
+ if (!nb_mce_en )
2826
+ amd64_printk (KERN_WARNING , "NB MCE bank disabled, set MSR "
2827
+ "0x%08x[4] on node %d to enable.\n" ,
2828
+ MSR_IA32_MCG_CTL , pvt -> mc_node_id );
2824
2829
2825
- amd64_printk (KERN_WARNING , "bit 0x%lx in register "
2826
- "F3x%x of the MISC_CONTROL device (%s) "
2827
- "should be enabled\n" , K8_NBCFG_ECC_ENABLE ,
2828
- K8_NBCFG , pci_name (pvt -> misc_f3_ctl ));
2829
- }
2830
- if (!ret ) {
2831
- amd64_printk (KERN_WARNING , "bit 0x%016lx in MSR 0x%08x "
2832
- "of node %d should be enabled\n" ,
2833
- K8_MSR_MCGCTL_NBE , MSR_IA32_MCG_CTL ,
2834
- pvt -> mc_node_id );
2835
- }
2830
+ if (!ecc_enabled || !nb_mce_en ) {
2836
2831
if (!ecc_enable_override ) {
2837
- amd64_printk (KERN_WARNING , "WARNING: ECC is NOT "
2838
- "currently enabled by the BIOS. Module "
2839
- "will NOT be loaded.\n"
2840
- " Either Enable ECC in the BIOS, "
2841
- "or use the 'ecc_enable_override' "
2842
- "parameter.\n"
2843
- " Might be a BIOS bug, if BIOS says "
2844
- "ECC is enabled\n"
2845
- " Use of the override can cause "
2846
- "unknown side effects.\n" );
2847
- ret = - ENODEV ;
2848
- } else
2849
- /*
2850
- * enable further driver loading if ECC enable is
2851
- * overridden.
2852
- */
2853
- ret = 0 ;
2854
- } else {
2855
- amd64_printk (KERN_INFO ,
2856
- "ECC is enabled by BIOS, Proceeding "
2857
- "with EDAC module initialization\n" );
2858
-
2859
- /* Signal good ECC status */
2860
- ret = 0 ;
2861
-
2832
+ amd64_printk (KERN_WARNING , "%s" , ecc_warning );
2833
+ return - ENODEV ;
2834
+ }
2835
+ } else
2862
2836
/* CLEAR the override, since BIOS controlled it */
2863
2837
ecc_enable_override = 0 ;
2864
- }
2865
2838
2866
- return ret ;
2839
+ return 0 ;
2867
2840
}
2868
2841
2869
2842
struct mcidev_sysfs_attribute sysfs_attrs [ARRAY_SIZE (amd64_dbg_attrs ) +
0 commit comments