anolis: efi: cper: Add Zhaoxin/Centaur ZDI/ZPI error decode

ANBZ: #9416

ZPI is the interconnection interface between sockets, ZDI is the
interconnection interface between dies.

When either zdi or zpi occurs error, it will trigger smi interrput, the
smi handler will read error information from the zdi/zpi configuration
space, fill it in the cper structure asscoiated with error and produce a
sci or nmi interrput to notify the OS ,the OS driver will decode the cper
structure to help user to annalyze the error.

Because UEFI spec does not define the section type of ZDI/ZPI error.
Zhaoxin defines ZDI/ZPI errors according to the error format defined by
the Generic Processor Error Section type.When the error occurs, The BIOS
will fill error information  in the data structure corresponding to the
Generic Processor Error Section type in the smi handler.However,the error
information printed by default  apei driver is not easy to read.

The software has added some printed logs to make the ZDI/ZPI error
information on the Zhaoxin/Centaur cpu vendor easier to read.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
Reviewed-by: Guixin Liu <kanie@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/3422
This commit is contained in:
leoliu-oc 2024-06-25 17:29:35 +08:00 committed by 小龙
parent a4a9d3d466
commit ab24de3925
2 changed files with 55 additions and 0 deletions

View File

@ -140,6 +140,54 @@ static const char * const proc_flag_strs[] = {
"corrected",
};
static const char * const zdi_zpi_err_type_strs[] = {
"No Error",
"Training Error Status (PHY)",
"Data Link Protocol Error Status (DLL)",
"Surprise Down Error Status",
"Flow Control Protocol Error Status (TL)",
"Receiver Overflow Status (TL)",
"Receiver Error Status (PHY)",
"Bad TLP Status (DLL)",
"Bad Data Link Layer Packet (DLLP) Status (DLL)",
"REPLAY_NUM Rollover Status (DLL)",
"Replay Timer Timeout Status (DLL)",
"X16 Link Width Unreliable Status",
"ZPI X8 Link Width Unreliable Status",
"ZPI X4 Link Width Unreliable Status",
"ZPI X2 Link Width Unreliable Status",
"ZPI Gen3 Link Speed Unreliable Status",
"ZPI Gen2 Link Speed Unreliable Status",
"ZDI Gen3 Link Speed Unreliable Status",
"ZDI Gen4 Link Speed Unreliable Status",
};
const char *cper_zdi_zpi_err_type_str(unsigned int etype)
{
return etype < ARRAY_SIZE(zdi_zpi_err_type_strs) ?
zdi_zpi_err_type_strs[etype] : "unknown error";
}
EXPORT_SYMBOL_GPL(cper_zdi_zpi_err_type_str);
static void cper_print_proc_generic_zdi_zpi(const char *pfx,
const struct cper_sec_proc_generic *zdi_zpi)
{
u8 etype = zdi_zpi->responder_id;
if ((zdi_zpi->requestor_id & 0xff) == 7) {
pr_info("%s general processor error(zpi error)\n", pfx);
} else if ((zdi_zpi->requestor_id & 0xff) == 6) {
pr_info("%s general processor error(zdi error)\n", pfx);
} else {
pr_info("%s general processor error(unknown error)\n", pfx);
return;
}
pr_info("%s bus number %llx device number %llx function number 0\n", pfx,
((zdi_zpi->requestor_id)>>8) & 0xff, zdi_zpi->requestor_id & 0xff);
pr_info("%s apic id %lld error_type: %s\n", pfx, zdi_zpi->proc_id,
cper_zdi_zpi_err_type_str(etype));
}
static void cper_print_proc_generic(const char *pfx,
const struct cper_sec_proc_generic *proc)
{
@ -183,6 +231,11 @@ static void cper_print_proc_generic(const char *pfx,
pfx, proc->responder_id);
if (proc->validation_bits & CPER_PROC_VALID_IP)
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
#if IS_ENABLED(CONFIG_X86)
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
cper_print_proc_generic_zdi_zpi(pfx, proc);
#endif
}
static const char * const mem_err_type_strs[] = {

View File

@ -572,4 +572,6 @@ void cper_print_proc_ia(const char *pfx,
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
const char *cper_zdi_zpi_err_type_str(unsigned int etype);
#endif