- Specify what error addresses reported on AMD are actually usable
memory error addresses for further decoding -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmU7kb8ACgkQEsHwGGHe VUq3/A//VTrsON+RRS+M7PVewXMiTbwjVytum/9gWXtuUBEFdWCQjCe4TSaI6+mX v8inAomBE7s3SoQYkosF1VO2l0r68aJLOm6hczzbjz+ZjGvramDiv5qCs0iMM8m4 Nvwyjeo1+2G6JeaX2rR7fqnZkA4NcYE1/s05pksNEaXMsAhpSOWenRgUK1EyQXLE y1u63G5GLMT4cpjEmEcbp9Lb02WwQzB9inZ1f4MFoujkI5VJ/9b68D+DpGwHd4Ag HNrg6LR/YpVwioVnsa+xEiQSxxwuRCHvS8kbc27d3qhfT4cRhmtAIsHYkyeO75TJ jkXU1Gme/k2RDEYHOz7heSVWgmG3y9/swc3UZJFE0QAnjdajY7mUsM9+o5uCm4Y6 rALf8z7t0+oMpG1YML5Y+0wvgcPk9pih6Mm9tbBlFCXPi2OQ5bieNkHe7RQXHcQx xwFoQI0ByWvW7omu+jqA8iN4YSLaQhST2wzghPF1Wu7KAewu5lpU+9kmgmL7utme aHIQFdhRFusYEABqlr8XQSew5FMtIfOeWWCdgWHghUQp6LCsA0QeUxcQR9VdY9Th IgY1j4G2lQeLpDlnWE9VPMCWk4cuTABRyVWEu1B4wScU3xRWD8jOh+LcF76RdoYz k7GW0d68DGDCRgU7q86LNM/bNH0zyIIteTj64uHBzQm0ygJcdZU= =NcVX -----END PGP SIGNATURE----- Merge tag 'ras_core_for_6.7_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 RAS updates from Borislav Petkov: - Specify what error addresses reported on AMD are actually usable memory error addresses for further decoding * tag 'ras_core_for_6.7_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Cleanup mce_usable_address() x86/mce: Define amd_mce_usable_address() x86/MCE/AMD: Split amd_mce_is_memory_error()
This commit is contained in:
commit
01ae815c50
5 changed files with 100 additions and 28 deletions
|
@ -245,7 +245,7 @@ static inline void cmci_recheck(void) {}
|
|||
int mce_available(struct cpuinfo_x86 *c);
|
||||
bool mce_is_memory_error(struct mce *m);
|
||||
bool mce_is_correctable(struct mce *m);
|
||||
int mce_usable_address(struct mce *m);
|
||||
bool mce_usable_address(struct mce *m);
|
||||
|
||||
DECLARE_PER_CPU(unsigned, mce_exception_count);
|
||||
DECLARE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
|
|
@ -713,17 +713,75 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
|||
deferred_error_interrupt_enable(c);
|
||||
}
|
||||
|
||||
bool amd_mce_is_memory_error(struct mce *m)
|
||||
/*
|
||||
* DRAM ECC errors are reported in the Northbridge (bank 4) with
|
||||
* Extended Error Code 8.
|
||||
*/
|
||||
static bool legacy_mce_is_memory_error(struct mce *m)
|
||||
{
|
||||
return m->bank == 4 && XEC(m->status, 0x1f) == 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* DRAM ECC errors are reported in Unified Memory Controllers with
|
||||
* Extended Error Code 0.
|
||||
*/
|
||||
static bool smca_mce_is_memory_error(struct mce *m)
|
||||
{
|
||||
enum smca_bank_types bank_type;
|
||||
/* ErrCodeExt[20:16] */
|
||||
u8 xec = (m->status >> 16) & 0x1f;
|
||||
|
||||
if (XEC(m->status, 0x3f))
|
||||
return false;
|
||||
|
||||
bank_type = smca_get_bank_type(m->extcpu, m->bank);
|
||||
if (mce_flags.smca)
|
||||
return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
|
||||
|
||||
return m->bank == 4 && xec == 0x8;
|
||||
return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
|
||||
}
|
||||
|
||||
bool amd_mce_is_memory_error(struct mce *m)
|
||||
{
|
||||
if (mce_flags.smca)
|
||||
return smca_mce_is_memory_error(m);
|
||||
else
|
||||
return legacy_mce_is_memory_error(m);
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD systems do not have an explicit indicator that the value in MCA_ADDR is
|
||||
* a system physical address. Therefore, individual cases need to be detected.
|
||||
* Future cases and checks will be added as needed.
|
||||
*
|
||||
* 1) General case
|
||||
* a) Assume address is not usable.
|
||||
* 2) Poison errors
|
||||
* a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy
|
||||
* northbridge (bank 4).
|
||||
* b) Refers to poison consumption in the core. Does not include "no action",
|
||||
* "action optional", or "deferred" error severities.
|
||||
* c) Will include a usable address so that immediate action can be taken.
|
||||
* 3) Northbridge DRAM ECC errors
|
||||
* a) Reported in legacy bank 4 with extended error code (XEC) 8.
|
||||
* b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
|
||||
* this bit should not be checked.
|
||||
*
|
||||
* NOTE: SMCA UMC memory errors fall into case #1.
|
||||
*/
|
||||
bool amd_mce_usable_address(struct mce *m)
|
||||
{
|
||||
/* Check special northbridge case 3) first. */
|
||||
if (!mce_flags.smca) {
|
||||
if (legacy_mce_is_memory_error(m))
|
||||
return true;
|
||||
else if (m->bank == 4)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check poison bit for all other bank types. */
|
||||
if (m->status & MCI_STATUS_POISON)
|
||||
return true;
|
||||
|
||||
/* Assume address is not usable for all others. */
|
||||
return false;
|
||||
}
|
||||
|
||||
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
|
||||
|
|
|
@ -453,32 +453,22 @@ static void mce_irq_work_cb(struct irq_work *entry)
|
|||
mce_schedule_work();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the address reported by the CPU is in a format we can parse.
|
||||
* It would be possible to add code for most other cases, but all would
|
||||
* be somewhat complicated (e.g. segment offset would require an instruction
|
||||
* parser). So only support physical addresses up to page granularity for now.
|
||||
*/
|
||||
int mce_usable_address(struct mce *m)
|
||||
bool mce_usable_address(struct mce *m)
|
||||
{
|
||||
if (!(m->status & MCI_STATUS_ADDRV))
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
/* Checks after this one are Intel/Zhaoxin-specific: */
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
|
||||
boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
|
||||
return 1;
|
||||
switch (m->cpuvendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
return amd_mce_usable_address(m);
|
||||
|
||||
if (!(m->status & MCI_STATUS_MISCV))
|
||||
return 0;
|
||||
case X86_VENDOR_INTEL:
|
||||
case X86_VENDOR_ZHAOXIN:
|
||||
return intel_mce_usable_address(m);
|
||||
|
||||
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
|
||||
return 0;
|
||||
|
||||
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_usable_address);
|
||||
|
||||
|
|
|
@ -536,3 +536,23 @@ bool intel_filter_mce(struct mce *m)
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the address reported by the CPU is in a format we can parse.
|
||||
* It would be possible to add code for most other cases, but all would
|
||||
* be somewhat complicated (e.g. segment offset would require an instruction
|
||||
* parser). So only support physical addresses up to page granularity for now.
|
||||
*/
|
||||
bool intel_mce_usable_address(struct mce *m)
|
||||
{
|
||||
if (!(m->status & MCI_STATUS_MISCV))
|
||||
return false;
|
||||
|
||||
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
|
||||
return false;
|
||||
|
||||
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ void intel_init_cmci(void);
|
|||
void intel_init_lmce(void);
|
||||
void intel_clear_lmce(void);
|
||||
bool intel_filter_mce(struct mce *m);
|
||||
bool intel_mce_usable_address(struct mce *m);
|
||||
#else
|
||||
# define cmci_intel_adjust_timer mce_adjust_timer_default
|
||||
static inline bool mce_intel_cmci_poll(void) { return false; }
|
||||
|
@ -58,6 +59,7 @@ static inline void intel_init_cmci(void) { }
|
|||
static inline void intel_init_lmce(void) { }
|
||||
static inline void intel_clear_lmce(void) { }
|
||||
static inline bool intel_filter_mce(struct mce *m) { return false; }
|
||||
static inline bool intel_mce_usable_address(struct mce *m) { return false; }
|
||||
#endif
|
||||
|
||||
void mce_timer_kick(unsigned long interval);
|
||||
|
@ -210,6 +212,7 @@ extern bool filter_mce(struct mce *m);
|
|||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
extern bool amd_filter_mce(struct mce *m);
|
||||
bool amd_mce_usable_address(struct mce *m);
|
||||
|
||||
/*
|
||||
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
|
||||
|
@ -237,6 +240,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
|
|||
|
||||
#else
|
||||
static inline bool amd_filter_mce(struct mce *m) { return false; }
|
||||
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
|
||||
static inline void smca_extract_err_addr(struct mce *m) { }
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in a new issue