diff options
-rw-r--r-- | Documentation/acpi/apei/einj.txt | 19 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 4 | ||||
-rw-r--r-- | drivers/acpi/acpi_extlog.c | 18 | ||||
-rw-r--r-- | drivers/acpi/apei/einj.c | 39 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 19 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 6 | ||||
-rw-r--r-- | include/linux/edac.h | 28 | ||||
-rw-r--r-- | include/trace/events/ras.h | 10 |
9 files changed, 136 insertions, 15 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index a58b63da1a36..f51861bcb07b 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt @@ -45,11 +45,22 @@ directory apei/einj. The following files are provided. injection. Before this, please specify all necessary error parameters. +- flags + Present for kernel version 3.13 and above. Used to specify which + of param{1..4} are valid and should be used by BIOS during injection. + Value is a bitmask as specified in ACPI5.0 spec for the + SET_ERROR_TYPE_WITH_ADDRESS data structure: + Bit 0 - Processor APIC field valid (see param3 below) + Bit 1 - Memory address and mask valid (param1 and param2) + Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below) + If set to zero, legacy behaviour is used where the type of injection + specifies just one bit set, and param1 is multiplexed. + - param1 This file is used to set the first error parameter value. Effect of parameter depends on error_type specified. For example, if error type is memory related type, the param1 should be a valid physical - memory address. + memory address. [Unless "flag" is set - see above] - param2 This file is used to set the second error parameter value. Effect of @@ -58,6 +69,12 @@ directory apei/einj. The following files are provided. address mask. Linux requires page or narrower granularity, say, 0xfffffffffffff000. +- param3 + Used when the 0x1 bit is set in "flag" to specify the APIC id + +- param4 + Used when the 0x4 bit is set in "flag" to specify target PCIe device + - notrigger The EINJ mechanism is a two step process. First inject the error, then perform some actions to trigger it. Setting "notrigger" to 1 skips the diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b9e9bd854298..1e1e0b77aec0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The xen output can only be used by Xen PV guests. + edac_report= [HW,EDAC] Control how to report EDAC event + Format: {"on" | "off" | "force"} + on: enable EDAC to report H/W event. May be overridden + by other higher priority error reporting module. + off: disable H/W event reporting through EDAC. + force: enforce the use of EDAC to report H/W event. + default: on. + ekgdboc= [X86,KGDB] Allow early kernel console debugging ekgdboc=kbd diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cdee95f..a389c1d859ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu) dev->release = &mce_device_release; err = device_register(dev); - if (err) + if (err) { + put_device(dev); return err; + } for (i = 0; mce_device_attrs[i]; i++) { err = device_create_file(dev, mce_device_attrs[i]); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a6869e110ce5..5d33c5415405 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,6 +12,7 @@ #include <acpi/acpi_bus.h> #include <linux/cper.h> #include <linux/ratelimit.h> +#include <linux/edac.h> #include <asm/cpu.h> #include <asm/mce.h> @@ -43,6 +44,8 @@ struct extlog_l1_head { u8 rev1[12]; }; +static int old_edac_report_status; + static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; /* L1 table related physical address */ @@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); - return NOTIFY_DONE; + return NOTIFY_STOP; } static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) @@ -231,8 +234,12 @@ static int __init extlog_init(void) u64 cap; int rc; - rc = -ENODEV; + if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); + return -EPERM; + } + rc = -ENODEV; rdmsrl(MSR_IA32_MCG_CAP, cap); if (!(cap & MCG_ELOG_P)) return rc; @@ -287,6 +294,12 @@ static int __init extlog_init(void) if (elog_buf == NULL) goto err_release_elog; + /* + * eMCA event report method has higher priority than EDAC method, + * unless EDAC event report method is mandatory. + */ + old_edac_report_status = get_edac_report_status(); + set_edac_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,6 +321,7 @@ err: static void __exit extlog_exit(void) { + set_edac_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index fb57d03e698b..c76674e2a01f 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -416,7 +416,8 @@ out: return rc; } -static int __einj_error_inject(u32 type, u64 param1, u64 param2) +static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) { struct apei_exec_context ctx; u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT; @@ -446,6 +447,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) break; } v5param->flags = vendor_flags; + } else if (flags) { + v5param->flags = flags; + v5param->memory_address = param1; + v5param->memory_address_range = param2; + v5param->apicid = param3; + v5param->pcie_sbdf = param4; } else { switch (type) { case ACPI_EINJ_PROCESSOR_CORRECTABLE: @@ -514,11 +521,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) } /* Inject the specified hardware error */ -static int einj_error_inject(u32 type, u64 param1, u64 param2) +static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) { int rc; unsigned long pfn; + /* If user manually set "flags", make sure it is legal */ + if (flags && (flags & + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) + return -EINVAL; + /* * We need extra sanity checks for memory errors. * Other types leap directly to injection. @@ -532,7 +545,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2) if (type & ACPI5_VENDOR_BIT) { if (vendor_flags != SETWA_FLAGS_MEM) goto inject; - } else if (!(type & MEM_ERROR_MASK)) + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) goto inject; /* @@ -546,15 +559,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2) inject: mutex_lock(&einj_mutex); - rc = __einj_error_inject(type, param1, param2); + rc = __einj_error_inject(type, flags, param1, param2, param3, param4); mutex_unlock(&einj_mutex); return rc; } static u32 error_type; +static u32 error_flags; static u64 error_param1; static u64 error_param2; +static u64 error_param3; +static u64 error_param4; static struct dentry *einj_debug_dir; static int available_error_type_show(struct seq_file *m, void *v) @@ -648,7 +664,8 @@ static int error_inject_set(void *data, u64 val) if (!error_type) return -EINVAL; - return einj_error_inject(error_type, error_param1, error_param2); + return einj_error_inject(error_type, error_flags, error_param1, error_param2, + error_param3, error_param4); } DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, @@ -729,6 +746,10 @@ static int __init einj_init(void) rc = -ENOMEM; einj_param = einj_get_parameter_address(); if ((param_extension || acpi5) && einj_param) { + fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_flags); + if (!fentry) + goto err_unmap; fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir, &error_param1); if (!fentry) @@ -737,6 +758,14 @@ static int __init einj_init(void) einj_debug_dir, &error_param2); if (!fentry) goto err_unmap; + fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param3); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param4); + if (!fentry) + goto err_unmap; fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR, einj_debug_dir, ¬rigger); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 351945fa2ecd..9d9e18aefaaa 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert); static atomic_t edac_subsys_valid = ATOMIC_INIT(0); +int edac_report_status = EDAC_REPORTING_ENABLED; +EXPORT_SYMBOL_GPL(edac_report_status); + +static int __init edac_report_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + set_edac_report_status(EDAC_REPORTING_ENABLED); + else if (!strncmp(str, "off", 3)) + set_edac_report_status(EDAC_REPORTING_DISABLED); + else if (!strncmp(str, "force", 5)) + set_edac_report_status(EDAC_REPORTING_FORCE); + + return 0; +} +__setup("edac_report=", edac_report_setup); + /* * called to determine if there is an EDAC driver interested in * knowing an event (such as NMI) occurred diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d7f1b57bd3be..1229123ccb59 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; struct sbridge_pvt *pvt; + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + mci = get_mci_for_node_id(mce->socketid); if (!mci) return NOTIFY_BAD; @@ -2142,9 +2145,10 @@ static int __init sbridge_init(void) opstate_init(); pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/include/linux/edac.h b/include/linux/edac.h index dbdffe8d4469..8e6c20af11a2 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void); extern struct bus_type *edac_get_sysfs_subsys(void); extern void edac_put_sysfs_subsys(void); +enum { + EDAC_REPORTING_ENABLED, + EDAC_REPORTING_DISABLED, + EDAC_REPORTING_FORCE +}; + +extern int edac_report_status; +#ifdef CONFIG_EDAC +static inline int get_edac_report_status(void) +{ + return edac_report_status; +} + +static inline void set_edac_report_status(int new) +{ + edac_report_status = new; +} +#else +static inline int get_edac_report_status(void) +{ + return EDAC_REPORTING_DISABLED; +} + +static inline void set_edac_report_status(int new) +{ +} +#endif + static inline void opstate_init(void) { switch (edac_op_state) { diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h index 88b878383797..1c875ad1ee5f 100644 --- a/include/trace/events/ras.h +++ b/include/trace/events/ras.h @@ -5,7 +5,7 @@ #define _TRACE_AER_H #include <linux/tracepoint.h> -#include <linux/edac.h> +#include <linux/aer.h> /* @@ -63,10 +63,10 @@ TRACE_EVENT(aer_event, TP_printk("%s PCIe Bus Error: severity=%s, %s\n", __get_str(dev_name), - __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : - __entry->severity == HW_EVENT_ERR_FATAL ? - "Fatal" : "Uncorrected", - __entry->severity == HW_EVENT_ERR_CORRECTED ? + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? __print_flags(__entry->status, "|", aer_correctable_errors) : __print_flags(__entry->status, "|", aer_uncorrectable_errors)) ); |