Skip to content

Commit 8f0b805

Browse files
maheshsalmpe
authored andcommitted
powerpc/pseries: Display machine check error details.
Extract the MCE error details from RTAS extended log and display it to console. With this patch you should now see mce logs like below: [ 142.371818] Severe Machine check interrupt [Recovered] [ 142.371822] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel] [ 142.371822] Initiator: CPU [ 142.371823] Error type: SLB [Multihit] [ 142.371824] Effective address: d00000000ca70000 Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent a43c159 commit 8f0b805

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed

arch/powerpc/include/asm/rtas.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
197197
return (elog->byte1 & 0x04) >> 2;
198198
}
199199

200+
static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
201+
{
202+
return (elog->byte2 & 0xf0) >> 4;
203+
}
204+
200205
#define rtas_error_type(x) ((x)->byte3)
201206

202207
static inline

arch/powerpc/platforms/pseries/ras.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,136 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
523523
return 0; /* need to perform reset */
524524
}
525525

526+
#define VAL_TO_STRING(ar, val) \
527+
(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
528+
529+
static void pseries_print_mce_info(struct pt_regs *regs,
530+
struct rtas_error_log *errp)
531+
{
532+
const char *level, *sevstr;
533+
struct pseries_errorlog *pseries_log;
534+
struct pseries_mc_errorlog *mce_log;
535+
u8 error_type, err_sub_type;
536+
u64 addr;
537+
u8 initiator = rtas_error_initiator(errp);
538+
int disposition = rtas_error_disposition(errp);
539+
540+
static const char * const initiators[] = {
541+
"Unknown",
542+
"CPU",
543+
"PCI",
544+
"ISA",
545+
"Memory",
546+
"Power Mgmt",
547+
};
548+
static const char * const mc_err_types[] = {
549+
"UE",
550+
"SLB",
551+
"ERAT",
552+
"TLB",
553+
"D-Cache",
554+
"Unknown",
555+
"I-Cache",
556+
};
557+
static const char * const mc_ue_types[] = {
558+
"Indeterminate",
559+
"Instruction fetch",
560+
"Page table walk ifetch",
561+
"Load/Store",
562+
"Page table walk Load/Store",
563+
};
564+
565+
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
566+
static const char * const mc_slb_types[] = {
567+
"Parity",
568+
"Multihit",
569+
"Indeterminate",
570+
};
571+
572+
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
573+
static const char * const mc_soft_types[] = {
574+
"Unknown",
575+
"Parity",
576+
"Multihit",
577+
"Indeterminate",
578+
};
579+
580+
if (!rtas_error_extended(errp)) {
581+
pr_err("Machine check interrupt: Missing extended error log\n");
582+
return;
583+
}
584+
585+
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
586+
if (pseries_log == NULL)
587+
return;
588+
589+
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
590+
591+
error_type = mce_log->error_type;
592+
err_sub_type = rtas_mc_error_sub_type(mce_log);
593+
594+
switch (rtas_error_severity(errp)) {
595+
case RTAS_SEVERITY_NO_ERROR:
596+
level = KERN_INFO;
597+
sevstr = "Harmless";
598+
break;
599+
case RTAS_SEVERITY_WARNING:
600+
level = KERN_WARNING;
601+
sevstr = "";
602+
break;
603+
case RTAS_SEVERITY_ERROR:
604+
case RTAS_SEVERITY_ERROR_SYNC:
605+
level = KERN_ERR;
606+
sevstr = "Severe";
607+
break;
608+
case RTAS_SEVERITY_FATAL:
609+
default:
610+
level = KERN_ERR;
611+
sevstr = "Fatal";
612+
break;
613+
}
614+
615+
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
616+
disposition == RTAS_DISP_FULLY_RECOVERED ?
617+
"Recovered" : "Not recovered");
618+
if (user_mode(regs)) {
619+
printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
620+
regs->nip, current->pid, current->comm);
621+
} else {
622+
printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
623+
(void *)regs->nip);
624+
}
625+
printk("%s Initiator: %s\n", level,
626+
VAL_TO_STRING(initiators, initiator));
627+
628+
switch (error_type) {
629+
case MC_ERROR_TYPE_UE:
630+
printk("%s Error type: %s [%s]\n", level,
631+
VAL_TO_STRING(mc_err_types, error_type),
632+
VAL_TO_STRING(mc_ue_types, err_sub_type));
633+
break;
634+
case MC_ERROR_TYPE_SLB:
635+
printk("%s Error type: %s [%s]\n", level,
636+
VAL_TO_STRING(mc_err_types, error_type),
637+
VAL_TO_STRING(mc_slb_types, err_sub_type));
638+
break;
639+
case MC_ERROR_TYPE_ERAT:
640+
case MC_ERROR_TYPE_TLB:
641+
printk("%s Error type: %s [%s]\n", level,
642+
VAL_TO_STRING(mc_err_types, error_type),
643+
VAL_TO_STRING(mc_soft_types, err_sub_type));
644+
break;
645+
default:
646+
printk("%s Error type: %s\n", level,
647+
VAL_TO_STRING(mc_err_types, error_type));
648+
break;
649+
}
650+
651+
addr = rtas_mc_get_effective_addr(mce_log);
652+
if (addr)
653+
printk("%s Effective address: %016llx\n", level, addr);
654+
}
655+
526656
static int mce_handle_error(struct rtas_error_log *errp)
527657
{
528658
struct pseries_errorlog *pseries_log;
@@ -585,8 +715,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
585715
int recovered = 0;
586716
int disposition = rtas_error_disposition(err);
587717

718+
pseries_print_mce_info(regs, err);
719+
588720
if (!(regs->msr & MSR_RI)) {
589721
/* If MSR_RI isn't set, we cannot recover */
722+
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
590723
recovered = 0;
591724

592725
} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {

0 commit comments

Comments
 (0)