aboutsummaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig8
-rw-r--r--drivers/acpi/apei/apei-base.c13
-rw-r--r--drivers/acpi/apei/apei-internal.h10
-rw-r--r--drivers/acpi/apei/ghes.c205
-rw-r--r--drivers/acpi/apei/hest.c29
5 files changed, 152 insertions, 113 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index c4dac7150960..b0140c8fc733 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,9 +1,15 @@
+config HAVE_ACPI_APEI
+ bool
+
+config HAVE_ACPI_APEI_NMI
+ bool
+
config ACPI_APEI
bool "ACPI Platform Error Interface (APEI)"
select MISC_FILESYSTEMS
select PSTORE
select UEFI_CPER
- depends on X86
+ depends on HAVE_ACPI_APEI
help
APEI allows to report errors (for example from the chipset)
to the operating system. This improves NMI handling
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 8678dfe5366b..2cd7bdd6c8b3 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -745,6 +745,19 @@ struct dentry *apei_get_debugfs_dir(void)
}
EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr,
+ void *data)
+{
+ return 1;
+}
+EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff);
+
+void __weak arch_apei_report_mem_error(int sev,
+ struct cper_sec_mem_err *mem_err)
+{
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
+
int apei_osc_setup(void)
{
static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index e5bcd919d4e6..16129c78b489 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -121,11 +121,11 @@ struct dentry;
struct dentry *apei_get_debugfs_dir(void);
#define apei_estatus_for_each_section(estatus, section) \
- for (section = (struct acpi_generic_data *)(estatus + 1); \
+ for (section = (struct acpi_hest_generic_data *)(estatus + 1); \
(void *)section - (void *)estatus < estatus->data_length; \
section = (void *)(section+1) + section->error_data_length)
-static inline u32 cper_estatus_len(struct acpi_generic_status *estatus)
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
{
if (estatus->raw_data_length)
return estatus->raw_data_offset + \
@@ -135,9 +135,9 @@ static inline u32 cper_estatus_len(struct acpi_generic_status *estatus)
}
void cper_estatus_print(const char *pfx,
- const struct acpi_generic_status *estatus);
-int cper_estatus_check_header(const struct acpi_generic_status *estatus);
-int cper_estatus_check(const struct acpi_generic_status *estatus);
+ const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
int apei_osc_setup(void);
#endif
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index dab7cb7349df..fc5f780bb61d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -47,11 +47,11 @@
#include <linux/genalloc.h>
#include <linux/pci.h>
#include <linux/aer.h>
+#include <linux/nmi.h>
#include <acpi/ghes.h>
-#include <asm/mce.h>
+#include <acpi/apei.h>
#include <asm/tlbflush.h>
-#include <asm/nmi.h>
#include "apei-internal.h"
@@ -74,20 +74,18 @@
#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
(sizeof(struct ghes_estatus_cache) + (estatus_len))
#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
- ((struct acpi_generic_status *) \
+ ((struct acpi_hest_generic_status *) \
((struct ghes_estatus_cache *)(estatus_cache) + 1))
#define GHES_ESTATUS_NODE_LEN(estatus_len) \
(sizeof(struct ghes_estatus_node) + (estatus_len))
#define GHES_ESTATUS_FROM_NODE(estatus_node) \
- ((struct acpi_generic_status *) \
+ ((struct acpi_hest_generic_status *) \
((struct ghes_estatus_node *)(estatus_node) + 1))
bool ghes_disable;
module_param_named(disable, ghes_disable, bool, 0);
-static int ghes_panic_timeout __read_mostly = 30;
-
/*
* All error sources notified with SCI shares one notifier function,
* so they need to be linked and checked one by one. This is applied
@@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30;
* list changing, not for traversing.
*/
static LIST_HEAD(ghes_sci);
-static LIST_HEAD(ghes_nmi);
static DEFINE_MUTEX(ghes_list_mutex);
/*
- * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
- * mutual exclusion.
- */
-static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
-
-/*
* Because the memory area used to transfer hardware error information
* from BIOS to Linux can be determined only in NMI, IRQ or timer
* handler, but general ioremap can not be used in atomic context, so
@@ -114,12 +105,16 @@ static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
*/
/*
- * Two virtual pages are used, one for NMI context, the other for
- * IRQ/PROCESS context
+ * Two virtual pages are used, one for IRQ/PROCESS context, the other for
+ * NMI context (optionally).
*/
-#define GHES_IOREMAP_PAGES 2
-#define GHES_IOREMAP_NMI_PAGE(base) (base)
-#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE)
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+#define GHES_IOREMAP_PAGES 2
+#else
+#define GHES_IOREMAP_PAGES 1
+#endif
+#define GHES_IOREMAP_IRQ_PAGE(base) (base)
+#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
/* virtual memory area for atomic ioremap */
static struct vm_struct *ghes_ioremap_area;
@@ -130,18 +125,8 @@ static struct vm_struct *ghes_ioremap_area;
static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
-/*
- * printk is not safe in NMI context. So in NMI handler, we allocate
- * required memory from lock-less memory allocator
- * (ghes_estatus_pool), save estatus into it, put them into lock-less
- * list (ghes_estatus_llist), then delay printk into IRQ context via
- * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
- * required pool size by all NMI error source.
- */
static struct gen_pool *ghes_estatus_pool;
static unsigned long ghes_estatus_pool_size_request;
-static struct llist_head ghes_estatus_llist;
-static struct irq_work ghes_proc_irq_work;
struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
@@ -192,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
- __flush_tlb_one(vaddr);
+ arch_apei_flush_tlb_one(vaddr);
}
static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
@@ -202,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
- __flush_tlb_one(vaddr);
+ arch_apei_flush_tlb_one(vaddr);
}
static int ghes_estatus_pool_init(void)
@@ -249,11 +234,6 @@ static int ghes_estatus_pool_expand(unsigned long len)
return 0;
}
-static void ghes_estatus_pool_shrink(unsigned long len)
-{
- ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
-}
-
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -408,7 +388,7 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR;
}
-static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
+static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
{
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
unsigned long pfn;
@@ -441,10 +421,10 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
}
static void ghes_do_proc(struct ghes *ghes,
- const struct acpi_generic_status *estatus)
+ const struct acpi_hest_generic_status *estatus)
{
int sev, sec_sev;
- struct acpi_generic_data *gdata;
+ struct acpi_hest_generic_data *gdata;
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -455,9 +435,7 @@ static void ghes_do_proc(struct ghes *ghes,
mem_err = (struct cper_sec_mem_err *)(gdata+1);
ghes_edac_report_mem_error(ghes, sev, mem_err);
-#ifdef CONFIG_X86_MCE
- apei_mce_report_mem_error(sev, mem_err);
-#endif
+ arch_apei_report_mem_error(sev, mem_err);
ghes_handle_memory_failure(gdata, sev);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -498,7 +476,7 @@ static void ghes_do_proc(struct ghes *ghes,
static void __ghes_print_estatus(const char *pfx,
const struct acpi_hest_generic *generic,
- const struct acpi_generic_status *estatus)
+ const struct acpi_hest_generic_status *estatus)
{
static atomic_t seqno;
unsigned int curr_seqno;
@@ -520,7 +498,7 @@ static void __ghes_print_estatus(const char *pfx,
static int ghes_print_estatus(const char *pfx,
const struct acpi_hest_generic *generic,
- const struct acpi_generic_status *estatus)
+ const struct acpi_hest_generic_status *estatus)
{
/* Not more than 2 messages every 5 seconds */
static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
@@ -542,13 +520,13 @@ static int ghes_print_estatus(const char *pfx,
* GHES error status reporting throttle, to report more kinds of
* errors, instead of just most frequently occurred errors.
*/
-static int ghes_estatus_cached(struct acpi_generic_status *estatus)
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
{
u32 len;
int i, cached = 0;
unsigned long long now;
struct ghes_estatus_cache *cache;
- struct acpi_generic_status *cache_estatus;
+ struct acpi_hest_generic_status *cache_estatus;
len = cper_estatus_len(estatus);
rcu_read_lock();
@@ -573,12 +551,12 @@ static int ghes_estatus_cached(struct acpi_generic_status *estatus)
static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
struct acpi_hest_generic *generic,
- struct acpi_generic_status *estatus)
+ struct acpi_hest_generic_status *estatus)
{
int alloced;
u32 len, cache_len;
struct ghes_estatus_cache *cache;
- struct acpi_generic_status *cache_estatus;
+ struct acpi_hest_generic_status *cache_estatus;
alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
@@ -621,7 +599,7 @@ static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
static void ghes_estatus_cache_add(
struct acpi_hest_generic *generic,
- struct acpi_generic_status *estatus)
+ struct acpi_hest_generic_status *estatus)
{
int i, slot = -1, count;
unsigned long long now, duration, period, max_period = 0;
@@ -734,6 +712,32 @@ static int ghes_notify_sci(struct notifier_block *this,
return ret;
}
+static struct notifier_block ghes_notifier_sci = {
+ .notifier_call = ghes_notify_sci,
+};
+
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+/*
+ * printk is not safe in NMI context. So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+static LIST_HEAD(ghes_nmi);
+
+static int ghes_panic_timeout __read_mostly = 30;
+
static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
{
struct llist_node *next, *tail = NULL;
@@ -753,7 +757,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
struct llist_node *llnode, *next;
struct ghes_estatus_node *estatus_node;
struct acpi_hest_generic *generic;
- struct acpi_generic_status *estatus;
+ struct acpi_hest_generic_status *estatus;
u32 len, node_len;
llnode = llist_del_all(&ghes_estatus_llist);
@@ -786,7 +790,7 @@ static void ghes_print_queued_estatus(void)
struct llist_node *llnode;
struct ghes_estatus_node *estatus_node;
struct acpi_hest_generic *generic;
- struct acpi_generic_status *estatus;
+ struct acpi_hest_generic_status *estatus;
u32 len, node_len;
llnode = llist_del_all(&ghes_estatus_llist);
@@ -845,7 +849,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
u32 len, node_len;
struct ghes_estatus_node *estatus_node;
- struct acpi_generic_status *estatus;
+ struct acpi_hest_generic_status *estatus;
#endif
if (!(ghes->flags & GHES_TO_CLEAR))
continue;
@@ -877,10 +881,6 @@ out:
return ret;
}
-static struct notifier_block ghes_notifier_sci = {
- .notifier_call = ghes_notify_sci,
-};
-
static unsigned long ghes_esource_prealloc_size(
const struct acpi_hest_generic *generic)
{
@@ -896,11 +896,71 @@ static unsigned long ghes_esource_prealloc_size(
return prealloc_size;
}
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+ ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
+static void ghes_nmi_add(struct ghes *ghes)
+{
+ unsigned long len;
+
+ len = ghes_esource_prealloc_size(ghes->generic);
+ ghes_estatus_pool_expand(len);
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_nmi))
+ register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+ list_add_rcu(&ghes->list, &ghes_nmi);
+ mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_nmi_remove(struct ghes *ghes)
+{
+ unsigned long len;
+
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_nmi))
+ unregister_nmi_handler(NMI_LOCAL, "ghes");
+ mutex_unlock(&ghes_list_mutex);
+ /*
+ * To synchronize with NMI handler, ghes can only be
+ * freed after NMI handler finishes.
+ */
+ synchronize_rcu();
+ len = ghes_esource_prealloc_size(ghes->generic);
+ ghes_estatus_pool_shrink(len);
+}
+
+static void ghes_nmi_init_cxt(void)
+{
+ init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+}
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
+static inline void ghes_nmi_add(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
+ ghes->generic->header.source_id);
+ BUG();
+}
+
+static inline void ghes_nmi_remove(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
+ ghes->generic->header.source_id);
+ BUG();
+}
+
+static inline void ghes_nmi_init_cxt(void)
+{
+}
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+
static int ghes_probe(struct platform_device *ghes_dev)
{
struct acpi_hest_generic *generic;
struct ghes *ghes = NULL;
- unsigned long len;
+
int rc = -EINVAL;
generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -911,7 +971,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
case ACPI_HEST_NOTIFY_POLLED:
case ACPI_HEST_NOTIFY_EXTERNAL:
case ACPI_HEST_NOTIFY_SCI:
+ break;
case ACPI_HEST_NOTIFY_NMI:
+ if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ }
break;
case ACPI_HEST_NOTIFY_LOCAL:
pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
@@ -925,7 +991,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
rc = -EIO;
if (generic->error_block_length <
- sizeof(struct acpi_generic_status)) {
+ sizeof(struct acpi_hest_generic_status)) {
pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
generic->error_block_length,
generic->header.source_id);
@@ -972,14 +1038,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
mutex_unlock(&ghes_list_mutex);
break;
case ACPI_HEST_NOTIFY_NMI:
- len = ghes_esource_prealloc_size(generic);
- ghes_estatus_pool_expand(len);
- mutex_lock(&ghes_list_mutex);
- if (list_empty(&ghes_nmi))
- register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
- "ghes");
- list_add_rcu(&ghes->list, &ghes_nmi);
- mutex_unlock(&ghes_list_mutex);
+ ghes_nmi_add(ghes);
break;
default:
BUG();
@@ -1001,7 +1060,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
{
struct ghes *ghes;
struct acpi_hest_generic *generic;
- unsigned long len;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
@@ -1022,18 +1080,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
mutex_unlock(&ghes_list_mutex);
break;
case ACPI_HEST_NOTIFY_NMI:
- mutex_lock(&ghes_list_mutex);
- list_del_rcu(&ghes->list);
- if (list_empty(&ghes_nmi))
- unregister_nmi_handler(NMI_LOCAL, "ghes");
- mutex_unlock(&ghes_list_mutex);
- /*
- * To synchronize with NMI handler, ghes can only be
- * freed after NMI handler finishes.
- */
- synchronize_rcu();
- len = ghes_esource_prealloc_size(generic);
- ghes_estatus_pool_shrink(len);
+ ghes_nmi_remove(ghes);
break;
default:
BUG();
@@ -1077,7 +1124,7 @@ static int __init ghes_init(void)
return -EINVAL;
}
- init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+ ghes_nmi_init_cxt();
rc = ghes_ioremap_init();
if (rc)
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index f5e37f32c71f..06e9b411a0a2 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -36,7 +36,6 @@
#include <linux/io.h>
#include <linux/platform_device.h>
#include <acpi/apei.h>
-#include <asm/mce.h>
#include "apei-internal.h"
@@ -128,33 +127,7 @@ EXPORT_SYMBOL_GPL(apei_hest_parse);
*/
static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
{
-#ifdef CONFIG_X86_MCE
- int i;
- struct acpi_hest_ia_corrected *cmc;
- struct acpi_hest_ia_error_bank *mc_bank;
-
- if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
- return 0;
-
- cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
- if (!cmc->enabled)
- return 0;
-
- /*
- * We expect HEST to provide a list of MC banks that report errors
- * in firmware first mode. Otherwise, return non-zero value to
- * indicate that we are done parsing HEST.
- */
- if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
- return 1;
-
- pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
-
- mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
- for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
- mce_disable_bank(mc_bank->bank_number);
-#endif
- return 1;
+ return arch_apei_enable_cmcff(hest_hdr, data);
}
struct ghes_arr {