@@ -326,6 +326,7 @@ struct sbridge_info {
326
326
const struct interleave_pkg * interleave_pkg ;
327
327
u8 max_sad ;
328
328
u8 (* get_node_id )(struct sbridge_pvt * pvt );
329
+ u8 (* get_ha )(u8 bank );
329
330
enum mem_type (* get_memory_type )(struct sbridge_pvt * pvt );
330
331
enum dev_type (* get_width )(struct sbridge_pvt * pvt , u32 mtr );
331
332
struct pci_dev * pci_vtd ;
@@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt)
1002
1003
return GET_BITFIELD (reg , 0 , 2 );
1003
1004
}
1004
1005
1006
+ /*
1007
+ * Use the reporting bank number to determine which memory
1008
+ * controller (also known as "ha" for "home agent"). Sandy
1009
+ * Bridge only has one memory controller per socket, so the
1010
+ * answer is always zero.
1011
+ */
1012
+ static u8 sbridge_get_ha (u8 bank )
1013
+ {
1014
+ return 0 ;
1015
+ }
1016
+
1017
+ /*
1018
+ * On Ivy Bridge, Haswell and Broadwell the error may be in a
1019
+ * home agent bank (7, 8), or one of the per-channel memory
1020
+ * controller banks (9 .. 16).
1021
+ */
1022
+ static u8 ibridge_get_ha (u8 bank )
1023
+ {
1024
+ switch (bank ) {
1025
+ case 7 ... 8 :
1026
+ return bank - 7 ;
1027
+ case 9 ... 16 :
1028
+ return (bank - 9 ) / 4 ;
1029
+ default :
1030
+ return - EINVAL ;
1031
+ }
1032
+ }
1033
+
1034
+ /* Not used, but included for safety/symmetry */
1035
+ static u8 knl_get_ha (u8 bank )
1036
+ {
1037
+ return - EINVAL ;
1038
+ }
1005
1039
1006
1040
static u64 haswell_get_tolm (struct sbridge_pvt * pvt )
1007
1041
{
@@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
2207
2241
return 0 ;
2208
2242
}
2209
2243
2244
+ static int get_memory_error_data_from_mce (struct mem_ctl_info * mci ,
2245
+ const struct mce * m , u8 * socket ,
2246
+ u8 * ha , long * channel_mask ,
2247
+ char * msg )
2248
+ {
2249
+ u32 reg , channel = GET_BITFIELD (m -> status , 0 , 3 );
2250
+ struct mem_ctl_info * new_mci ;
2251
+ struct sbridge_pvt * pvt ;
2252
+ struct pci_dev * pci_ha ;
2253
+ bool tad0 ;
2254
+
2255
+ if (channel >= NUM_CHANNELS ) {
2256
+ sprintf (msg , "Invalid channel 0x%x" , channel );
2257
+ return - EINVAL ;
2258
+ }
2259
+
2260
+ pvt = mci -> pvt_info ;
2261
+ if (!pvt -> info .get_ha ) {
2262
+ sprintf (msg , "No get_ha()" );
2263
+ return - EINVAL ;
2264
+ }
2265
+ * ha = pvt -> info .get_ha (m -> bank );
2266
+ if (* ha != 0 && * ha != 1 ) {
2267
+ sprintf (msg , "Impossible bank %d" , m -> bank );
2268
+ return - EINVAL ;
2269
+ }
2270
+
2271
+ * socket = m -> socketid ;
2272
+ new_mci = get_mci_for_node_id (* socket , * ha );
2273
+ if (!new_mci ) {
2274
+ strcpy (msg , "mci socket got corrupted!" );
2275
+ return - EINVAL ;
2276
+ }
2277
+
2278
+ pvt = new_mci -> pvt_info ;
2279
+ pci_ha = pvt -> pci_ha ;
2280
+ pci_read_config_dword (pci_ha , tad_dram_rule [0 ], & reg );
2281
+ tad0 = m -> addr <= TAD_LIMIT (reg );
2282
+
2283
+ * channel_mask = 1 << channel ;
2284
+ if (pvt -> mirror_mode == FULL_MIRRORING ||
2285
+ (pvt -> mirror_mode == ADDR_RANGE_MIRRORING && tad0 )) {
2286
+ * channel_mask |= 1 << ((channel + 2 ) % 4 );
2287
+ pvt -> is_cur_addr_mirrored = true;
2288
+ } else {
2289
+ pvt -> is_cur_addr_mirrored = false;
2290
+ }
2291
+
2292
+ if (pvt -> is_lockstep )
2293
+ * channel_mask |= 1 << ((channel + 1 ) % 4 );
2294
+
2295
+ return 0 ;
2296
+ }
2297
+
2210
2298
/****************************************************************************
2211
2299
Device initialization routines: put/get, init/exit
2212
2300
****************************************************************************/
@@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
2877
2965
u32 errcode = GET_BITFIELD (m -> status , 0 , 15 );
2878
2966
u32 channel = GET_BITFIELD (m -> status , 0 , 3 );
2879
2967
u32 optypenum = GET_BITFIELD (m -> status , 4 , 6 );
2968
+ /*
2969
+ * Bits 5-0 of MCi_MISC give the least significant bit that is valid.
2970
+ * A value 6 is for cache line aligned address, a value 12 is for page
2971
+ * aligned address reported by patrol scrubber.
2972
+ */
2973
+ u32 lsb = GET_BITFIELD (m -> misc , 0 , 5 );
2880
2974
long channel_mask , first_channel ;
2881
- u8 rank , socket , ha ;
2975
+ u8 rank = 0xff , socket , ha ;
2882
2976
int rc , dimm ;
2883
- char * area_type = NULL ;
2977
+ char * area_type = "DRAM" ;
2884
2978
2885
2979
if (pvt -> info .type != SANDY_BRIDGE )
2886
2980
recoverable = true;
@@ -2964,9 +3058,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
2964
3058
optype , msg );
2965
3059
}
2966
3060
return ;
2967
- } else {
3061
+ } else if ( lsb < 12 ) {
2968
3062
rc = get_memory_error_data (mci , m -> addr , & socket , & ha ,
2969
- & channel_mask , & rank , & area_type , msg );
3063
+ & channel_mask , & rank ,
3064
+ & area_type , msg );
3065
+ } else {
3066
+ rc = get_memory_error_data_from_mce (mci , m , & socket , & ha ,
3067
+ & channel_mask , msg );
2970
3068
}
2971
3069
2972
3070
if (rc < 0 )
@@ -2981,14 +3079,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
2981
3079
2982
3080
first_channel = find_first_bit (& channel_mask , NUM_CHANNELS );
2983
3081
2984
- if (rank < 4 )
3082
+ if (rank == 0xff )
3083
+ dimm = -1 ;
3084
+ else if (rank < 4 )
2985
3085
dimm = 0 ;
2986
3086
else if (rank < 8 )
2987
3087
dimm = 1 ;
2988
3088
else
2989
3089
dimm = 2 ;
2990
3090
2991
-
2992
3091
/*
2993
3092
* FIXME: On some memory configurations (mirror, lockstep), the
2994
3093
* Memory Controller can't point the error to a single DIMM. The
@@ -3175,6 +3274,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
3175
3274
pvt -> info .dram_rule = ibridge_dram_rule ;
3176
3275
pvt -> info .get_memory_type = get_memory_type ;
3177
3276
pvt -> info .get_node_id = get_node_id ;
3277
+ pvt -> info .get_ha = ibridge_get_ha ;
3178
3278
pvt -> info .rir_limit = rir_limit ;
3179
3279
pvt -> info .sad_limit = sad_limit ;
3180
3280
pvt -> info .interleave_mode = interleave_mode ;
@@ -3199,6 +3299,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
3199
3299
pvt -> info .dram_rule = sbridge_dram_rule ;
3200
3300
pvt -> info .get_memory_type = get_memory_type ;
3201
3301
pvt -> info .get_node_id = get_node_id ;
3302
+ pvt -> info .get_ha = sbridge_get_ha ;
3202
3303
pvt -> info .rir_limit = rir_limit ;
3203
3304
pvt -> info .sad_limit = sad_limit ;
3204
3305
pvt -> info .interleave_mode = interleave_mode ;
@@ -3223,6 +3324,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
3223
3324
pvt -> info .dram_rule = ibridge_dram_rule ;
3224
3325
pvt -> info .get_memory_type = haswell_get_memory_type ;
3225
3326
pvt -> info .get_node_id = haswell_get_node_id ;
3327
+ pvt -> info .get_ha = ibridge_get_ha ;
3226
3328
pvt -> info .rir_limit = haswell_rir_limit ;
3227
3329
pvt -> info .sad_limit = sad_limit ;
3228
3330
pvt -> info .interleave_mode = interleave_mode ;
@@ -3247,6 +3349,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
3247
3349
pvt -> info .dram_rule = ibridge_dram_rule ;
3248
3350
pvt -> info .get_memory_type = haswell_get_memory_type ;
3249
3351
pvt -> info .get_node_id = haswell_get_node_id ;
3352
+ pvt -> info .get_ha = ibridge_get_ha ;
3250
3353
pvt -> info .rir_limit = haswell_rir_limit ;
3251
3354
pvt -> info .sad_limit = sad_limit ;
3252
3355
pvt -> info .interleave_mode = interleave_mode ;
@@ -3271,6 +3374,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
3271
3374
pvt -> info .dram_rule = knl_dram_rule ;
3272
3375
pvt -> info .get_memory_type = knl_get_memory_type ;
3273
3376
pvt -> info .get_node_id = knl_get_node_id ;
3377
+ pvt -> info .get_ha = knl_get_ha ;
3274
3378
pvt -> info .rir_limit = NULL ;
3275
3379
pvt -> info .sad_limit = knl_sad_limit ;
3276
3380
pvt -> info .interleave_mode = knl_interleave_mode ;
0 commit comments