aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/acpi/apei/einj.txt19
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--drivers/acpi/acpi_extlog.c18
-rw-r--r--drivers/acpi/apei/einj.c39
-rw-r--r--drivers/edac/edac_stub.c19
-rw-r--r--drivers/edac/sb_edac.c6
-rw-r--r--include/linux/edac.h28
-rw-r--r--include/trace/events/ras.h10
9 files changed, 136 insertions, 15 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index a58b63da1a36..f51861bcb07b 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -45,11 +45,22 @@ directory apei/einj. The following files are provided.
injection. Before this, please specify all necessary error
parameters.
+- flags
+ Present for kernel version 3.13 and above. Used to specify which
+ of param{1..4} are valid and should be used by BIOS during injection.
+ Value is a bitmask as specified in ACPI5.0 spec for the
+ SET_ERROR_TYPE_WITH_ADDRESS data structure:
+ Bit 0 - Processor APIC field valid (see param3 below)
+ Bit 1 - Memory address and mask valid (param1 and param2)
+ Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
+ If set to zero, legacy behaviour is used where the type of injection
+ specifies just one bit set, and param1 is multiplexed.
+
- param1
This file is used to set the first error parameter value. Effect of
parameter depends on error_type specified. For example, if error
type is memory related type, the param1 should be a valid physical
- memory address.
+ memory address. [Unless "flag" is set - see above]
- param2
This file is used to set the second error parameter value. Effect of
@@ -58,6 +69,12 @@ directory apei/einj. The following files are provided.
address mask. Linux requires page or narrower granularity, say,
0xfffffffffffff000.
+- param3
+ Used when the 0x1 bit is set in "flag" to specify the APIC id
+
+- param4
+ Used when the 0x4 bit is set in "flag" to specify target PCIe device
+
- notrigger
The EINJ mechanism is a two step process. First inject the error, then
perform some actions to trigger it. Setting "notrigger" to 1 skips the
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b9e9bd854298..1e1e0b77aec0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The xen output can only be used by Xen PV guests.
+ edac_report= [HW,EDAC] Control how to report EDAC event
+ Format: {"on" | "off" | "force"}
+ on: enable EDAC to report H/W event. May be overridden
+ by other higher priority error reporting module.
+ off: disable H/W event reporting through EDAC.
+ force: enforce the use of EDAC to report H/W event.
+ default: on.
+
ekgdboc= [X86,KGDB] Allow early kernel console debugging
ekgdboc=kbd
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..a389c1d859ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
dev->release = &mce_device_release;
err = device_register(dev);
- if (err)
+ if (err) {
+ put_device(dev);
return err;
+ }
for (i = 0; mce_device_attrs[i]; i++) {
err = device_create_file(dev, mce_device_attrs[i]);
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
#include <acpi/acpi_bus.h>
#include <linux/cper.h>
#include <linux/ratelimit.h>
+#include <linux/edac.h>
#include <asm/cpu.h>
#include <asm/mce.h>
@@ -43,6 +44,8 @@ struct extlog_l1_head {
u8 rev1[12];
};
+static int old_edac_report_status;
+
static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
/* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
- return NOTIFY_DONE;
+ return NOTIFY_STOP;
}
static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
u64 cap;
int rc;
- rc = -ENODEV;
+ if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+ pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
+ return -EPERM;
+ }
+ rc = -ENODEV;
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (!(cap & MCG_ELOG_P))
return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
if (elog_buf == NULL)
goto err_release_elog;
+ /*
+ * eMCA event report method has higher priority than EDAC method,
+ * unless EDAC event report method is mandatory.
+ */
+ old_edac_report_status = get_edac_report_status();
+ set_edac_report_status(EDAC_REPORTING_DISABLED);
mce_register_decode_chain(&extlog_mce_dec);
/* enable OS to be involved to take over management from BIOS */
((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ err:
static void __exit extlog_exit(void)
{
+ set_edac_report_status(old_edac_report_status);
mce_unregister_decode_chain(&extlog_mce_dec);
((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
if (extlog_l1_addr)
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index fb57d03e698b..c76674e2a01f 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -416,7 +416,8 @@ out:
return rc;
}
-static int __einj_error_inject(u32 type, u64 param1, u64 param2)
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
struct apei_exec_context ctx;
u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
@@ -446,6 +447,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
break;
}
v5param->flags = vendor_flags;
+ } else if (flags) {
+ v5param->flags = flags;
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
} else {
switch (type) {
case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -514,11 +521,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
}
/* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u64 param1, u64 param2)
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
int rc;
unsigned long pfn;
+ /* If user manually set "flags", make sure it is legal */
+ if (flags && (flags &
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ return -EINVAL;
+
/*
* We need extra sanity checks for memory errors.
* Other types leap directly to injection.
@@ -532,7 +545,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
if (type & ACPI5_VENDOR_BIT) {
if (vendor_flags != SETWA_FLAGS_MEM)
goto inject;
- } else if (!(type & MEM_ERROR_MASK))
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
goto inject;
/*
@@ -546,15 +559,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
inject:
mutex_lock(&einj_mutex);
- rc = __einj_error_inject(type, param1, param2);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
mutex_unlock(&einj_mutex);
return rc;
}
static u32 error_type;
+static u32 error_flags;
static u64 error_param1;
static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
static struct dentry *einj_debug_dir;
static int available_error_type_show(struct seq_file *m, void *v)
@@ -648,7 +664,8 @@ static int error_inject_set(void *data, u64 val)
if (!error_type)
return -EINVAL;
- return einj_error_inject(error_type, error_param1, error_param2);
+ return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+ error_param3, error_param4);
}
DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
@@ -729,6 +746,10 @@ static int __init einj_init(void)
rc = -ENOMEM;
einj_param = einj_get_parameter_address();
if ((param_extension || acpi5) && einj_param) {
+ fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_flags);
+ if (!fentry)
+ goto err_unmap;
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
@@ -737,6 +758,14 @@ static int __init einj_init(void)
einj_debug_dir, &error_param2);
if (!fentry)
goto err_unmap;
+ fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param3);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param4);
+ if (!fentry)
+ goto err_unmap;
fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
einj_debug_dir, &notrigger);
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
+int edac_report_status = EDAC_REPORTING_ENABLED;
+EXPORT_SYMBOL_GPL(edac_report_status);
+
+static int __init edac_report_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strncmp(str, "on", 2))
+ set_edac_report_status(EDAC_REPORTING_ENABLED);
+ else if (!strncmp(str, "off", 3))
+ set_edac_report_status(EDAC_REPORTING_DISABLED);
+ else if (!strncmp(str, "force", 5))
+ set_edac_report_status(EDAC_REPORTING_FORCE);
+
+ return 0;
+}
+__setup("edac_report=", edac_report_setup);
+
/*
* called to determine if there is an EDAC driver interested in
* knowing an event (such as NMI) occurred
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index d7f1b57bd3be..1229123ccb59 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mem_ctl_info *mci;
struct sbridge_pvt *pvt;
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ return NOTIFY_DONE;
+
mci = get_mci_for_node_id(mce->socketid);
if (!mci)
return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
opstate_init();
pci_rc = pci_register_driver(&sbridge_driver);
-
if (pci_rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
return 0;
}
diff --git a/include/linux/edac.h b/include/linux/edac.h
index dbdffe8d4469..8e6c20af11a2 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void);
extern struct bus_type *edac_get_sysfs_subsys(void);
extern void edac_put_sysfs_subsys(void);
+enum {
+ EDAC_REPORTING_ENABLED,
+ EDAC_REPORTING_DISABLED,
+ EDAC_REPORTING_FORCE
+};
+
+extern int edac_report_status;
+#ifdef CONFIG_EDAC
+static inline int get_edac_report_status(void)
+{
+ return edac_report_status;
+}
+
+static inline void set_edac_report_status(int new)
+{
+ edac_report_status = new;
+}
+#else
+static inline int get_edac_report_status(void)
+{
+ return EDAC_REPORTING_DISABLED;
+}
+
+static inline void set_edac_report_status(int new)
+{
+}
+#endif
+
static inline void opstate_init(void)
{
switch (edac_op_state) {
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
index 88b878383797..1c875ad1ee5f 100644
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -5,7 +5,7 @@
#define _TRACE_AER_H
#include <linux/tracepoint.h>
-#include <linux/edac.h>
+#include <linux/aer.h>
/*
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event,
TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
__get_str(dev_name),
- __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
- __entry->severity == HW_EVENT_ERR_FATAL ?
- "Fatal" : "Uncorrected",
- __entry->severity == HW_EVENT_ERR_CORRECTED ?
+ __entry->severity == AER_CORRECTABLE ? "Corrected" :
+ __entry->severity == AER_FATAL ?
+ "Fatal" : "Uncorrected, non-fatal",
+ __entry->severity == AER_CORRECTABLE ?
__print_flags(__entry->status, "|", aer_correctable_errors) :
__print_flags(__entry->status, "|", aer_uncorrectable_errors))
);