diff options
author | Ingo Molnar <mingo@kernel.org> | 2017-06-08 10:12:12 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-06-08 10:12:12 +0200 |
commit | a5506c46a4d249a422b0eca537026fc7a1ac78b5 (patch) | |
tree | 6a463b0e6ac0ca16eb195a4f49a22afb1403337f /arch | |
parent | 36cc2b9222b5106de34085c4dd8635ac67ef5cba (diff) | |
parent | cc1582c231ea041fbc68861dfaf957eaf902b829 (diff) |
Merge branch 'perf/urgent' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
38 files changed, 378 insertions, 239 deletions
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index 9d5dc4fda3c1..3f7d1b74c5e0 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -17,14 +17,12 @@ @ there. .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 #else - mov r0, r0 + W(mov) r0, r0 #endif .endm .macro __EFI_HEADER #ifdef CONFIG_EFI_STUB - b __efi_start - .set start_offset, __efi_start - start .org start + 0x3c @ diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 7c711ba61417..8a756870c238 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -130,19 +130,22 @@ start: .rept 7 __nop .endr - ARM( mov r0, r0 ) - ARM( b 1f ) - THUMB( badr r12, 1f ) - THUMB( bx r12 ) +#ifndef CONFIG_THUMB2_KERNEL + mov r0, r0 +#else + AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode + M_CLASS( nop.w ) @ M: already in Thumb2 mode + .thumb +#endif + W(b) 1f .word _magic_sig @ Magic numbers to help the loader .word _magic_start @ absolute load/run zImage address .word _magic_end @ zImage end address .word 0x04030201 @ endianness flag - THUMB( .thumb ) -1: __EFI_HEADER - + __EFI_HEADER +1: ARM_BE8( setend be ) @ go BE8 if compiled for BE8 AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index cf062472e07b..2b913f17d50f 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) return ret; } -typedef void (*phys_reset_t)(unsigned long); +typedef typeof(cpu_reset) phys_reset_t; void mcpm_cpu_power_down(void) { @@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void) * on the CPU. */ phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); - phys_reset(__pa_symbol(mcpm_entry_point)); + phys_reset(__pa_symbol(mcpm_entry_point), false); /* should never get here */ BUG(); @@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg) __mcpm_cpu_down(cpu, cluster); phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); - phys_reset(__pa_symbol(mcpm_entry_point)); + phys_reset(__pa_symbol(mcpm_entry_point), false); BUG(); } diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 302240c19a5a..a0d726a47c8a 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t; #define pgprot_noncached(prot) (prot) #define pgprot_writecombine(prot) (prot) #define pgprot_dmacoherent(prot) (prot) +#define pgprot_device(prot) (prot) /* diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 75bce2d0b1a8..49f6a6242cf9 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -81,6 +81,45 @@ }; }; + reg_sys_5v: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "SYS_5V"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + regulator-always-on; + }; + + reg_vdd_3v3: regulator@1 { + compatible = "regulator-fixed"; + regulator-name = "VDD_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + regulator-always-on; + vin-supply = <®_sys_5v>; + }; + + reg_5v_hub: regulator@2 { + compatible = "regulator-fixed"; + regulator-name = "5V_HUB"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + gpio = <&gpio0 7 0>; + regulator-always-on; + vin-supply = <®_sys_5v>; + }; + + wl1835_pwrseq: wl1835-pwrseq { + compatible = "mmc-pwrseq-simple"; + /* WLAN_EN GPIO */ + reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; + clocks = <&pmic>; + clock-names = "ext_clock"; + power-off-delay-us = <10>; + }; + soc { spi0: spi@f7106000 { status = "ok"; @@ -256,11 +295,31 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ + dwmmc_0: dwmmc0@f723d000 { + cap-mmc-highspeed; + non-removable; + bus-width = <0x8>; + vmmc-supply = <&ldo19>; + }; + + dwmmc_1: dwmmc1@f723e000 { + card-detect-delay = <200>; + cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + vqmmc-supply = <&ldo7>; + vmmc-supply = <&ldo10>; + bus-width = <0x4>; + disable-wp; + cd-gpios = <&gpio1 0 1>; + }; + dwmmc_2: dwmmc2@f723f000 { - ti,non-removable; + bus-width = <0x4>; non-removable; - /* WL_EN */ - vmmc-supply = <&wlan_en_reg>; + vmmc-supply = <®_vdd_3v3>; + mmc-pwrseq = <&wl1835_pwrseq>; #address-cells = <0x1>; #size-cells = <0x0>; @@ -272,18 +331,6 @@ interrupts = <3 IRQ_TYPE_EDGE_RISING>; }; }; - - wlan_en_reg: regulator@1 { - compatible = "regulator-fixed"; - regulator-name = "wlan-en-regulator"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - /* WLAN_EN GPIO */ - gpio = <&gpio0 5 0>; - /* WLAN card specific delay */ - startup-delay-us = <70000>; - enable-active-high; - }; }; leds { @@ -330,6 +377,7 @@ pmic: pmic@f8000000 { compatible = "hisilicon,hi655x-pmic"; reg = <0x0 0xf8000000 0x0 0x1000>; + #clock-cells = <0>; interrupt-controller; #interrupt-cells = <2>; pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 1e5129b19280..5013e4b2ea71 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -725,20 +725,10 @@ status = "disabled"; }; - fixed_5v_hub: regulator@0 { - compatible = "regulator-fixed"; - regulator-name = "fixed_5v_hub"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - regulator-boot-on; - gpio = <&gpio0 7 0>; - regulator-always-on; - }; - usb_phy: usbphy { compatible = "hisilicon,hi6220-usb-phy"; #phy-cells = <0>; - phy-supply = <&fixed_5v_hub>; + phy-supply = <®_5v_hub>; hisilicon,peripheral-syscon = <&sys_ctrl>; }; @@ -766,17 +756,12 @@ dwmmc_0: dwmmc0@f723d000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - cap-mmc-highspeed; - non-removable; reg = <0x0 0xf723d000 0x0 0x1000>; interrupts = <0x0 0x48 0x4>; clocks = <&sys_ctrl 2>, <&sys_ctrl 1>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>; reset-names = "reset"; - bus-width = <0x8>; - vmmc-supply = <&ldo19>; pinctrl-names = "default"; pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func &emmc_cfg_func &emmc_rst_cfg_func>; @@ -784,13 +769,7 @@ dwmmc_1: dwmmc1@f723e000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - card-detect-delay = <200>; hisilicon,peripheral-syscon = <&ao_ctrl>; - cap-sd-highspeed; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; reg = <0x0 0xf723e000 0x0 0x1000>; interrupts = <0x0 0x49 0x4>; #address-cells = <0x1>; @@ -799,11 +778,6 @@ clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>; reset-names = "reset"; - vqmmc-supply = <&ldo7>; - vmmc-supply = <&ldo10>; - bus-width = <0x4>; - disable-wp; - cd-gpios = <&gpio1 0 1>; pinctrl-names = "default", "idle"; pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>; pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>; @@ -811,15 +785,12 @@ dwmmc_2: dwmmc2@f723f000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; reg = <0x0 0xf723f000 0x0 0x1000>; interrupts = <0x0 0x4a 0x4>; clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>; reset-names = "reset"; - bus-width = <0x4>; - broken-cd; pinctrl-names = "default", "idle"; pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>; pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>; diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 0e99978da3f0..59cca1d6ec54 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -23,9 +23,9 @@ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) -#define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ - (entry)->header.length != ACPI_MADT_GICC_LENGTH) +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ + (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..c7e3e6387a49 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); - if (!root_ops) + if (!root_ops) { + kfree(ri); return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h index a89bddefdacf..139093fab326 100644 --- a/arch/frv/include/asm/timex.h +++ b/arch/frv/include/asm/timex.h @@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void) #define vxtime_lock() do {} while (0) #define vxtime_unlock() do {} while (0) +/* This attribute is used in include/linux/jiffies.h alongside with + * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp + * for frv does not contain another section specification. + */ +#define __jiffy_arch_data __attribute__((__section__(".data"))) + #endif diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 918d4c73e951..5351e1f3950d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; - p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index f8da545854f9..106859ae27ff 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -167,8 +167,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, top_of_kernel_stack = sp; - p->set_child_tid = p->clear_child_tid = NULL; - /* Locate userspace context on stack... */ sp -= STACK_FRAME_OVERHEAD; /* redzone */ sp -= sizeof(struct pt_regs); diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 3e7ce86d5c13..4d877144f377 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -46,6 +46,8 @@ #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */ #define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */ +#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */ +#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ /* * IMPORTANT! diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9b3e88b1a9c8..6f849832a669 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER_POWER9 COMMON_USER_POWER8 #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128) + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN ) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 40c4887c27b6..f83056297441 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -161,7 +161,9 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, +#ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, +#endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 96c2b8a40630..0c45cdbac4cf 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr); + ret = hash_page(ea, + _PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED, + 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 067defeea691..78fa9395b8c5 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -714,7 +714,7 @@ static void pnv_npu2_release_context(struct kref *kref) void pnv_npu2_destroy_context(struct npu_context *npu_context, struct pci_dev *gpdev) { - struct pnv_phb *nphb, *phb; + struct pnv_phb *nphb; struct npu *npu; struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); struct device_node *nvlink_dn; @@ -728,13 +728,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, nphb = pci_bus_to_host(npdev->bus)->private_data; npu = &nphb->npu; - phb = pci_bus_to_host(gpdev->bus)->private_data; nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return; npu_context->npdev[npu->index][nvlink_index] = NULL; - opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id, + opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); kref_put(&npu_context->kref, pnv_npu2_release_context); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cd18994a9555..4ccfacc7232a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -360,7 +360,7 @@ config SMP Management" code will be disabled if you say Y here. See also <file:Documentation/x86/i386/IO-APIC.txt>, - <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at + <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://www.tldp.org/docs.html#howto>. If you don't know what to do here, say N. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5851411e60fb..bf240b920473 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER # If '-Os' is enabled, disable it and print a warning. ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE undefine CONFIG_CC_OPTIMIZE_FOR_SIZE - $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) + $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) endif endif diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 44163e8c3868..2c860ad4fe06 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - readelf -S $$obj | grep -qF .rel.local && { \ + ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 50bc26949e9e..48ef7bb32c42 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,6 +252,23 @@ ENTRY(__switch_to_asm) END(__switch_to_asm) /* + * The unwinder expects the last frame on the stack to always be at the same + * offset from the end of the page, which allows it to validate the stack. + * Calling schedule_tail() directly would break that convention because its an + * asmlinkage function so its argument has to be pushed on the stack. This + * wrapper creates a proper "end of stack" frame header before the call. + */ +ENTRY(schedule_tail_wrapper) + FRAME_BEGIN + + pushl %eax + call schedule_tail + popl %eax + + FRAME_END + ret +ENDPROC(schedule_tail_wrapper) +/* * A newly forked process directly context switches into this address. * * eax: prev task we switched from @@ -259,24 +276,15 @@ END(__switch_to_asm) * edi: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ - - /* - * schedule_tail() is asmlinkage so we have to put its 'prev' argument - * on the stack. - */ - pushl %eax - call schedule_tail - popl %eax + call schedule_tail_wrapper testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - leal FRAME_OFFSET(%esp), %eax + movl %esp, %eax call syscall_return_slowpath - FRAME_END jmp restore_all /* kernel thread */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 607d72c4a485..4a4c0834f965 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,7 +36,6 @@ #include <asm/smap.h> #include <asm/pgtable_types.h> #include <asm/export.h> -#include <asm/frame.h> #include <linux/err.h> .code64 @@ -406,19 +405,17 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ + call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ 2: - leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */ + movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS - FRAME_END jmp restore_regs_and_iret 1: diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4fd5195deed0..3f9a3d2a5209 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s #endif int mce_available(struct cpuinfo_x86 *c); +bool mce_is_memory_error(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c5b8f760473c..32e14d137416 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; - /* 0xe8 is a relative jump; fix the offset. */ - if (*insnbuf == 0xe8 && a->replacementlen == 5) { + /* + * 0xe8 is a relative jump; fix the offset. + * + * Instruction length is checked before the opcode to avoid + * accessing uninitialized bytes for zero-length replacements. + */ + if (a->replacementlen == 5 && *insnbuf == 0xe8) { *(s32 *)(insnbuf + 1) += replacement - instr; DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", *(s32 *)(insnbuf + 1), diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5abd4bf73d6e..5cfbaeb6529a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m) return 1; } -static bool memory_error(struct mce *m) +bool mce_is_memory_error(struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - if (c->x86_vendor == X86_VENDOR_AMD) { + if (m->cpuvendor == X86_VENDOR_AMD) { /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; return (xec == 0x0 || xec == 0x8); - } else if (c->x86_vendor == X86_VENDOR_INTEL) { + } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes * @@ -529,6 +527,7 @@ static bool memory_error(struct mce *m) return false; } +EXPORT_SYMBOL_GPL(mce_is_memory_error); static bool cec_add_mce(struct mce *m) { @@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m) return false; /* We eat only correctable DRAM errors with usable addresses. */ - if (memory_error(m) && + if (mce_is_memory_error(m) && !(m->status & MCI_STATUS_UC) && mce_usable_address(m)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) @@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) + if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m)) if (m.status & MCI_STATUS_ADDRV) m.severity = severity; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 45db4d2ebd01..e9f4d762aa5b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { @@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) if (!desc.mc) return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), - desc.data, desc.size); + ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); if (ret != UCODE_OK) return -EINVAL; @@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) #ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ - if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(cpu); + if (save) { + struct ucode_patch *p = find_patch(0); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + if (!refresh_fw || !bsp) return UCODE_OK; if (c->x86 >= 0x15) @@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); + ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0651e974dcb3..9bef1bbeba63 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -689,8 +689,12 @@ static inline void *alloc_tramp(unsigned long size) { return module_alloc(size); } -static inline void tramp_free(void *tramp) +static inline void tramp_free(void *tramp, int size) { + int npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + + set_memory_nx((unsigned long)tramp, npages); + set_memory_rw((unsigned long)tramp, npages); module_memfree(tramp); } #else @@ -699,7 +703,7 @@ static inline void *alloc_tramp(unsigned long size) { return NULL; } -static inline void tramp_free(void *tramp) { } +static inline void tramp_free(void *tramp, int size) { } #endif /* Defined as markers to the end of the ftrace default trampolines */ @@ -771,7 +775,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) { - tramp_free(trampoline); + tramp_free(trampoline, *tramp_size); return 0; } @@ -797,7 +801,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* Are we pointing to the reference? */ if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) { - tramp_free(trampoline); + tramp_free(trampoline, *tramp_size); return 0; } @@ -839,7 +843,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) unsigned long offset; unsigned long ip; unsigned int size; - int ret; + int ret, npages; if (ops->trampoline) { /* @@ -848,11 +852,14 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) */ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; + npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; + set_memory_rw(ops->trampoline, npages); } else { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; + npages = PAGE_ALIGN(size) >> PAGE_SHIFT; } offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); @@ -863,6 +870,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); + set_memory_ro(ops->trampoline, npages); /* The update should never fail */ WARN_ON(ret); @@ -939,7 +947,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; - tramp_free((void *)ops->trampoline); + tramp_free((void *)ops->trampoline, ops->trampoline_size); ops->trampoline = 0; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 5b2bbfbb3712..6b877807598b 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include <linux/ftrace.h> #include <linux/frame.h> #include <linux/kasan.h> +#include <linux/moduleloader.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn) } } +/* Recover page to RW mode before releasing it */ +void free_insn_page(void *page) +{ + set_memory_nx((unsigned long)page & PAGE_MASK, 1); + set_memory_rw((unsigned long)page & PAGE_MASK, 1); + module_memfree(page); +} + static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ff40e74c9181..ffeae818aa7a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - smp_processor_id()); + raw_smp_processor_id()); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0b4d3c686b1e..f81823695014 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p) */ x86_configure_nx(); - simple_udelay_calibration(); - parse_early_param(); #ifdef CONFIG_MEMORY_HOTPLUG @@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + simple_udelay_calibration(); + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 82c6d7f1fd73..b9389d72b2f7 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state) return (unsigned long *)task_pt_regs(state->task) - 2; } +static bool is_last_frame(struct unwind_state *state) +{ + return state->bp == last_frame(state); +} + #ifdef CONFIG_X86_32 #define GCC_REALIGN_WORDS 3 #else @@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state) return last_frame(state) - GCC_REALIGN_WORDS; } -static bool is_last_task_frame(struct unwind_state *state) +static bool is_last_aligned_frame(struct unwind_state *state) { unsigned long *last_bp = last_frame(state); unsigned long *aligned_bp = last_aligned_frame(state); /* - * We have to check for the last task frame at two different locations - * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame in the prologue of a function - * called by head/entry code. Examples: + * GCC can occasionally decide to realign the stack pointer and change + * the offset of the stack frame in the prologue of a function called + * by head/entry code. Examples: * * <start_secondary>: * push %edi @@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state) * push %rbp * mov %rsp,%rbp * - * Note that after aligning the stack, it pushes a duplicate copy of - * the return address before pushing the frame pointer. + * After aligning the stack, it pushes a duplicate copy of the return + * address before pushing the frame pointer. + */ + return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); +} + +static bool is_last_ftrace_frame(struct unwind_state *state) +{ + unsigned long *last_bp = last_frame(state); + unsigned long *last_ftrace_bp = last_bp - 3; + + /* + * When unwinding from an ftrace handler of a function called by entry + * code, the stack layout of the last frame is: + * + * bp + * parent ret addr + * bp + * function ret addr + * parent ret addr + * pt_regs + * ----------------- */ - return (state->bp == last_bp || - (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); + return (state->bp == last_ftrace_bp && + *state->bp == *(state->bp + 2) && + *(state->bp + 1) == *(state->bp + 4)); +} + +static bool is_last_task_frame(struct unwind_state *state) +{ + return is_last_frame(state) || is_last_aligned_frame(state) || + is_last_ftrace_frame(state); } /* diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c329d2894905..d24c8742d9b0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; + preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) @@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) for (i = 0; i < KVM_APIC_LVT_NUM; i++) kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic_update_lvtt(apic); - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) + if (kvm_vcpu_is_reset_bsp(vcpu) && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) kvm_lapic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 183ddb235fb4..ba9891ac5c56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1807,7 +1807,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ - var->unusable = !var->present || (var->type == 0); + var->unusable = !var->present; switch (seg) { case VCPU_SREG_TR: @@ -1840,6 +1840,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1980,18 +1981,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -2000,7 +1997,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 72f78396bc09..9b4b5d6dcd34 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6914,97 +6914,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, return 0; } -/* - * This function performs the various checks including - * - if it's 4KB aligned - * - No bits beyond the physical address width are set - * - Returns 0 on success or else 1 - * (Intel SDM Section 30.3) - */ -static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, - gpa_t *vmpointer) +static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) { gva_t gva; - gpa_t vmptr; struct x86_exception e; - struct page *page; - struct vcpu_vmx *vmx = to_vmx(vcpu); - int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, - sizeof(vmptr), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer, + sizeof(*vmpointer), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } - switch (exit_reason) { - case EXIT_REASON_VMON: - /* - * SDM 3: 24.11.5 - * The first 4 bytes of VMXON region contain the supported - * VMCS revision identifier - * - * Note - IA32_VMX_BASIC[48] will never be 1 - * for the nested case; - * which replaces physical address width with 32 - * - */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - nested_release_page_clean(page); - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - kunmap(page); - nested_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; - break; - case EXIT_REASON_VMCLEAR: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case EXIT_REASON_VMPTRLD: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - default: - return 1; /* shouldn't happen */ - } - - if (vmpointer) - *vmpointer = vmptr; return 0; } @@ -7066,6 +6990,8 @@ out_msr_bitmap: static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; + gpa_t vmptr; + struct page *page; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -7095,9 +7021,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - + + /* + * SDM 3: 24.11.5 + * The first 4 bytes of VMXON region contain the supported + * VMCS revision identifier + * + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; + * which replaces physical address width with 32 + */ + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + + page = nested_get_page(vcpu, vmptr); + if (page == NULL) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { + kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + kunmap(page); + nested_release_page_clean(page); + + vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -7213,9 +7167,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); @@ -7545,9 +7509,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; @@ -7913,11 +7887,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7972,6 +7948,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02363e37d4a6..a2cd0997343c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8394,10 +8394,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (vcpu->arch.pv.pv_unhalted) return true; - if (atomic_read(&vcpu->arch.nmi_queued)) + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_ops->nmi_allowed(vcpu))) return true; - if (kvm_test_request(KVM_REQ_SMI, vcpu)) + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && !is_smm(vcpu))) return true; if (kvm_arch_interrupt_allowed(vcpu) && diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1dcd2be4cce4..c8520b2c62d2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -186,7 +186,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) unsigned int i, level; unsigned long addr; - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); WARN_ON(PAGE_ALIGN(start) != start); on_each_cpu(__cpa_flush_range, NULL, 1); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7e76a4d8304b..43b96f5f78ba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI + * non-native EFI. With efi=old_map, we don't do runtime services in + * kexec kernel because in the initial boot something else might + * have been mapped at these virtual addresses. */ - if (!efi_is_native()) { + if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c488625c9712..eb8dff15a7f6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable) pgd_t * __init efi_call_phys_prolog(void) { - unsigned long vaddress; - pgd_t *save_pgd; + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; int pgd; - int n_pgds; + int n_pgds, i, j; if (!efi_enabled(EFI_OLD_MEMMAP)) { save_pgd = (pgd_t *)read_cr3(); @@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void) n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + /* + * Build 1:1 identity mapping for efi=old_map usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ for (pgd = 0; pgd < n_pgds; pgd++) { - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } } out: __flush_tlb_all(); @@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) /* * After the lock is released, the original page table is restored. */ - int pgd_idx; + int pgd_idx, i; int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { write_cr3((unsigned long)save_pgd); @@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + kfree(save_pgd); __flush_tlb_all(); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 26615991d69c..e0cf95a83f3f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -360,6 +360,9 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + if (!num_entries) + return; + new_size = efi.memmap.desc_size * num_entries; new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { |