From 71e0940d52e107748b270213a01d3b1546657d74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Sep 2018 09:32:44 -0700 Subject: efi: honour memory reservations passed via a linux specific config table In order to allow the OS to reserve memory persistently across a kexec, introduce a Linux-specific UEFI configuration table that points to the head of a linked list in memory, allowing each kernel to add list items describing memory regions that the next kernel should treat as reserved. This is useful, e.g., for GICv3 based ARM systems that cannot disable DMA access to the LPI tables, forcing them to reuse the same memory region again after a kexec reboot. Tested-by: Jeremy Linton Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 401e4b254e30..a5cb580472c5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -672,6 +672,7 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) +#define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) typedef struct { efi_guid_t guid; @@ -957,6 +958,7 @@ extern struct efi { unsigned long mem_attr_table; /* memory attributes table */ unsigned long rng_seed; /* UEFI firmware random seed */ unsigned long tpm_log; /* TPM2 Event Log table */ + unsigned long mem_reserve; /* Linux EFI memreserve table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; @@ -1662,4 +1664,10 @@ extern int efi_tpm_eventlog_init(void); /* Workqueue to queue EFI Runtime Services */ extern struct workqueue_struct *efi_rts_wq; +struct linux_efi_memreserve { + phys_addr_t next; + phys_addr_t base; + phys_addr_t size; +}; + #endif /* _LINUX_EFI_H */ -- cgit From a23d3bb05ccbd815c79293d2207fedede0b3515d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Sep 2018 09:32:46 -0700 Subject: efi: add API to reserve memory persistently across kexec reboot Add kernel plumbing to reserve memory regions persistently on a EFI system by adding entries to the MEMRESERVE linked list. Tested-by: Jeremy Linton Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 32 ++++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 688132ac8a0a..249eb70691b0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -962,6 +962,38 @@ bool efi_is_table_address(unsigned long phys_addr) return false; } +static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); + +int efi_mem_reserve_persistent(phys_addr_t addr, u64 size) +{ + struct linux_efi_memreserve *rsv, *parent; + + if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) + return -ENODEV; + + rsv = kmalloc(sizeof(*rsv), GFP_KERNEL); + if (!rsv) + return -ENOMEM; + + parent = memremap(efi.mem_reserve, sizeof(*rsv), MEMREMAP_WB); + if (!parent) { + kfree(rsv); + return -ENOMEM; + } + + rsv->base = addr; + rsv->size = size; + + spin_lock(&efi_mem_reserve_persistent_lock); + rsv->next = parent->next; + parent->next = __pa(rsv); + spin_unlock(&efi_mem_reserve_persistent_lock); + + memunmap(parent); + + return 0; +} + #ifdef CONFIG_KEXEC static int update_efi_random_seed(struct notifier_block *nb, unsigned long code, void *unused) diff --git a/include/linux/efi.h b/include/linux/efi.h index a5cb580472c5..22e4de9d3700 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1043,6 +1043,7 @@ extern int __init efi_uart_console_only (void); extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); +extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_reserve_boot_services(void); -- cgit From 9dbbedaa6171247c4c7c40b83f05b200a117c2e0 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Tue, 11 Sep 2018 12:15:21 -0700 Subject: efi: Make efi_rts_work accessible to efi page fault handler After the kernel has booted, if any accesses by firmware causes a page fault, the efi page fault handler would freeze efi_rts_wq and schedules a new process. To do this, the efi page fault handler needs efi_rts_work. Hence, make it accessible. There will be no race conditions in accessing this structure, because all the calls to efi runtime services are already serialized. Tested-by: Bhupesh Sharma Suggested-by: Matt Fleming Based-on-code-from: Ricardo Neri Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/runtime-wrappers.c | 53 ++++++--------------------------- include/linux/efi.h | 36 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index aa66cbf23512..b18b2d864c2c 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -45,39 +45,7 @@ #define __efi_call_virt(f, args...) \ __efi_call_virt_pointer(efi.systab->runtime, f, args) -/* efi_runtime_service() function identifiers */ -enum efi_rts_ids { - GET_TIME, - SET_TIME, - GET_WAKEUP_TIME, - SET_WAKEUP_TIME, - GET_VARIABLE, - GET_NEXT_VARIABLE, - SET_VARIABLE, - QUERY_VARIABLE_INFO, - GET_NEXT_HIGH_MONO_COUNT, - UPDATE_CAPSULE, - QUERY_CAPSULE_CAPS, -}; - -/* - * efi_runtime_work: Details of EFI Runtime Service work - * @arg<1-5>: EFI Runtime Service function arguments - * @status: Status of executing EFI Runtime Service - * @efi_rts_id: EFI Runtime Service function identifier - * @efi_rts_comp: Struct used for handling completions - */ -struct efi_runtime_work { - void *arg1; - void *arg2; - void *arg3; - void *arg4; - void *arg5; - efi_status_t status; - struct work_struct work; - enum efi_rts_ids efi_rts_id; - struct completion efi_rts_comp; -}; +struct efi_runtime_work efi_rts_work; /* * efi_queue_work: Queue efi_runtime_service() and wait until it's done @@ -91,7 +59,6 @@ struct efi_runtime_work { */ #define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5) \ ({ \ - struct efi_runtime_work efi_rts_work; \ efi_rts_work.status = EFI_ABORTED; \ \ init_completion(&efi_rts_work.efi_rts_comp); \ @@ -184,18 +151,16 @@ static DEFINE_SEMAPHORE(efi_runtime_lock); */ static void efi_call_rts(struct work_struct *work) { - struct efi_runtime_work *efi_rts_work; void *arg1, *arg2, *arg3, *arg4, *arg5; efi_status_t status = EFI_NOT_FOUND; - efi_rts_work = container_of(work, struct efi_runtime_work, work); - arg1 = efi_rts_work->arg1; - arg2 = efi_rts_work->arg2; - arg3 = efi_rts_work->arg3; - arg4 = efi_rts_work->arg4; - arg5 = efi_rts_work->arg5; + arg1 = efi_rts_work.arg1; + arg2 = efi_rts_work.arg2; + arg3 = efi_rts_work.arg3; + arg4 = efi_rts_work.arg4; + arg5 = efi_rts_work.arg5; - switch (efi_rts_work->efi_rts_id) { + switch (efi_rts_work.efi_rts_id) { case GET_TIME: status = efi_call_virt(get_time, (efi_time_t *)arg1, (efi_time_cap_t *)arg2); @@ -253,8 +218,8 @@ static void efi_call_rts(struct work_struct *work) */ pr_err("Requested executing invalid EFI Runtime Service.\n"); } - efi_rts_work->status = status; - complete(&efi_rts_work->efi_rts_comp); + efi_rts_work.status = status; + complete(&efi_rts_work.efi_rts_comp); } static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) diff --git a/include/linux/efi.h b/include/linux/efi.h index 22e4de9d3700..a929d2bf41fa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1662,6 +1662,42 @@ struct linux_efi_tpm_eventlog { extern int efi_tpm_eventlog_init(void); +/* efi_runtime_service() function identifiers */ +enum efi_rts_ids { + GET_TIME, + SET_TIME, + GET_WAKEUP_TIME, + SET_WAKEUP_TIME, + GET_VARIABLE, + GET_NEXT_VARIABLE, + SET_VARIABLE, + QUERY_VARIABLE_INFO, + GET_NEXT_HIGH_MONO_COUNT, + UPDATE_CAPSULE, + QUERY_CAPSULE_CAPS, +}; + +/* + * efi_runtime_work: Details of EFI Runtime Service work + * @arg<1-5>: EFI Runtime Service function arguments + * @status: Status of executing EFI Runtime Service + * @efi_rts_id: EFI Runtime Service function identifier + * @efi_rts_comp: Struct used for handling completions + */ +struct efi_runtime_work { + void *arg1; + void *arg2; + void *arg3; + void *arg4; + void *arg5; + efi_status_t status; + struct work_struct work; + enum efi_rts_ids efi_rts_id; + struct completion efi_rts_comp; +}; + +extern struct efi_runtime_work efi_rts_work; + /* Workqueue to queue EFI Runtime Services */ extern struct workqueue_struct *efi_rts_wq; -- cgit From 3425d934fc0312f62024163736a7afe4de20c10f Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Tue, 11 Sep 2018 12:15:22 -0700 Subject: efi/x86: Handle page faults occurring while running EFI runtime services Memory accesses performed by UEFI runtime services should be limited to: - reading/executing from EFI_RUNTIME_SERVICES_CODE memory regions - reading/writing from/to EFI_RUNTIME_SERVICES_DATA memory regions - reading/writing by-ref arguments - reading/writing from/to the stack. Accesses outside these regions may cause the kernel to hang because the memory region requested by the firmware isn't mapped in efi_pgd, which causes a page fault in ring 0 and the kernel fails to handle it, leading to die(). To save kernel from hanging, add an EFI specific page fault handler which recovers from such faults by 1. If the efi runtime service is efi_reset_system(), reboot the machine through BIOS. 2. If the efi runtime service is _not_ efi_reset_system(), then freeze efi_rts_wq and schedule a new process. The EFI page fault handler offers us two advantages: 1. Avoid potential hangs caused by buggy firmware. 2. Shout loud that the firmware is buggy and hence is not a kernel bug. Tested-by: Bhupesh Sharma Suggested-by: Matt Fleming Based-on-code-from: Ricardo Neri Signed-off-by: Sai Praneeth Prakhya Reviewed-by: Thomas Gleixner [ardb: clarify commit log] Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 1 + arch/x86/mm/fault.c | 9 ++++ arch/x86/platform/efi/quirks.c | 78 +++++++++++++++++++++++++++++++++ drivers/firmware/efi/runtime-wrappers.c | 8 ++++ include/linux/efi.h | 8 +++- 5 files changed, 103 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cec5fae23eb3..eea40d52ca78 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -140,6 +140,7 @@ extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); +extern void efi_recover_from_page_fault(unsigned long phys_addr); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 47bebfe6efa7..a5b9ddb0f1fe 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -16,6 +16,7 @@ #include /* prefetchw */ #include /* exception_enter(), ... */ #include /* faulthandler_disabled() */ +#include /* efi_recover_from_page_fault()*/ #include #include /* boot_cpu_has, ... */ @@ -25,6 +26,7 @@ #include /* emulate_vsyscall */ #include /* struct vm86 */ #include /* vma_pkey() */ +#include /* efi_recover_from_page_fault()*/ #define CREATE_TRACE_POINTS #include @@ -788,6 +790,13 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (is_errata93(regs, address)) return; + /* + * Buggy firmware could access regions which might page fault, try to + * recover from such faults. + */ + if (IS_ENABLED(CONFIG_EFI)) + efi_recover_from_page_fault(address); + /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice: diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 844d31cb8a0c..669babcaf245 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -16,6 +16,7 @@ #include #include #include +#include #define EFI_MIN_RESERVE 5120 @@ -654,3 +655,80 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, } #endif + +/* + * If any access by any efi runtime service causes a page fault, then, + * 1. If it's efi_reset_system(), reboot through BIOS. + * 2. If any other efi runtime service, then + * a. Return error status to the efi caller process. + * b. Disable EFI Runtime Services forever and + * c. Freeze efi_rts_wq and schedule new process. + * + * @return: Returns, if the page fault is not handled. This function + * will never return if the page fault is handled successfully. + */ +void efi_recover_from_page_fault(unsigned long phys_addr) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* + * Make sure that an efi runtime service caused the page fault. + * "efi_mm" cannot be used to check if the page fault had occurred + * in the firmware context because efi=old_map doesn't use efi_pgd. + */ + if (efi_rts_work.efi_rts_id == NONE) + return; + + /* + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so + * page faulting on these addresses isn't expected. + */ + if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + return; + + /* + * Print stack trace as it might be useful to know which EFI Runtime + * Service is buggy. + */ + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n", + phys_addr); + + /* + * Buggy efi_reset_system() is handled differently from other EFI + * Runtime Services as it doesn't use efi_rts_wq. Although, + * native_machine_emergency_restart() says that machine_real_restart() + * could fail, it's better not to compilcate this fault handler + * because this case occurs *very* rarely and hence could be improved + * on a need by basis. + */ + if (efi_rts_work.efi_rts_id == RESET_SYSTEM) { + pr_info("efi_reset_system() buggy! Reboot through BIOS\n"); + machine_real_restart(MRR_BIOS); + return; + } + + /* + * Before calling EFI Runtime Service, the kernel has switched the + * calling process to efi_mm. Hence, switch back to task_mm. + */ + arch_efi_call_virt_teardown(); + + /* Signal error status to the efi caller process */ + efi_rts_work.status = EFI_ABORTED; + complete(&efi_rts_work.efi_rts_comp); + + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n"); + + /* + * Call schedule() in an infinite loop, so that any spurious wake ups + * will never run efi_rts_wq again. + */ + for (;;) { + set_current_state(TASK_IDLE); + schedule(); + } + + return; +} diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index b18b2d864c2c..a19d845bdb06 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -61,6 +61,11 @@ struct efi_runtime_work efi_rts_work; ({ \ efi_rts_work.status = EFI_ABORTED; \ \ + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ + pr_warn_once("EFI Runtime Services are disabled!\n"); \ + goto exit; \ + } \ + \ init_completion(&efi_rts_work.efi_rts_comp); \ INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts); \ efi_rts_work.arg1 = _arg1; \ @@ -79,6 +84,8 @@ struct efi_runtime_work efi_rts_work; else \ pr_err("Failed to queue work to efi_rts_wq.\n"); \ \ +exit: \ + efi_rts_work.efi_rts_id = NONE; \ efi_rts_work.status; \ }) @@ -393,6 +400,7 @@ static void virt_efi_reset_system(int reset_type, "could not get exclusive access to the firmware\n"); return; } + efi_rts_work.efi_rts_id = RESET_SYSTEM; __efi_call_virt(reset_system, reset_type, status, data_size, data); up(&efi_runtime_lock); } diff --git a/include/linux/efi.h b/include/linux/efi.h index a929d2bf41fa..845174e113ce 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1662,8 +1662,13 @@ struct linux_efi_tpm_eventlog { extern int efi_tpm_eventlog_init(void); -/* efi_runtime_service() function identifiers */ +/* + * efi_runtime_service() function identifiers. + * "NONE" is used by efi_recover_from_page_fault() to check if the page + * fault happened while executing an efi runtime service. + */ enum efi_rts_ids { + NONE, GET_TIME, SET_TIME, GET_WAKEUP_TIME, @@ -1673,6 +1678,7 @@ enum efi_rts_ids { SET_VARIABLE, QUERY_VARIABLE_INFO, GET_NEXT_HIGH_MONO_COUNT, + RESET_SYSTEM, UPDATE_CAPSULE, QUERY_CAPSULE_CAPS, }; -- cgit