diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/bcm63138.dtsi | 14 | ||||
-rw-r--r-- | arch/arm/boot/dts/stm32mp157c.dtsi | 4 | ||||
-rw-r--r-- | arch/arm/boot/dts/sun8i-r40.dtsi | 3 | ||||
-rw-r--r-- | arch/arm/mm/ioremap.c | 2 | ||||
-rw-r--r-- | arch/arm/tools/syscall.tbl | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/guest.c | 55 | ||||
-rw-r--r-- | arch/arm64/mm/hugetlbpage.c | 50 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 | ||||
-rw-r--r-- | arch/riscv/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/Makefile | 16 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 26 | ||||
-rw-r--r-- | arch/x86/events/amd/uncore.c | 10 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/uv/uv.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 137 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 |
21 files changed, 284 insertions, 108 deletions
diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index b10dccd0958f..3b1baa8605a7 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -11,6 +11,7 @@ #include "sama5d2-pinfunc.h" #include <dt-bindings/mfd/atmel-flexcom.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/pinctrl/at91.h> / { model = "Atmel SAMA5D2 PTC EK"; @@ -299,6 +300,7 @@ <PIN_PA30__NWE_NANDWE>, <PIN_PB2__NRD_NANDOE>; bias-pull-up; + atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>; }; ale_cle_rdy_cs { diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_EDGE_RISING)>; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_LEVEL_HIGH)>; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = <GIC_SPI 32 0>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = <GIC_SPI 33 0>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = <GIC_SPI 38 0>; + interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "nand"; }; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 661be948ab74..185541a5b69f 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -1078,8 +1078,8 @@ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc SPI6_K>; resets = <&rcc SPI6_R>; - dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>, - <&mdma1 35 0x0 0x40002 0x0 0x0 0>; + dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>, + <&mdma1 35 0x0 0x40002 0x0 0x0>; dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index ffd9f00f74a4..5f547c161baf 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -800,8 +800,7 @@ }; hdmi_phy: hdmi-phy@1ef0000 { - compatible = "allwinner,sun8i-r40-hdmi-phy", - "allwinner,sun50i-a64-hdmi-phy"; + compatible = "allwinner,sun8i-r40-hdmi-phy"; reg = <0x01ef0000 0x10000>; clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>, <&ccu 7>, <&ccu 16>; diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index fbc74b5fa3ed..8edf93b4490f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -413,3 +413,4 @@ 396 common pkey_free sys_pkey_free 397 common statx sys_statx 398 common rseq sys_rseq +399 common io_pgetevents sys_io_pgetevents diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 933c574e1cf7..998f8d089ac7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -646,6 +646,16 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!ptep) { + local_irq_enable(); + if (page) + put_page(page); + return RESUME_GUEST; + } pte = *ptep; local_irq_enable(); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index aee603123030..b2d26d9d8489 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -186,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = pfn_base + PFN_DOWN(mem_size); + max_low_pfn = memblock_end_of_DRAM(); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fa3f439f0a92..141d415a8c80 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f19856d95c60..e48ca3afa091 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "=a" (ret), "=m" (*ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "=a" (ret), "=m" (*tv), "=m" (*tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 981ba5e8241b..8671de126eac 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -36,6 +36,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -525,6 +533,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -532,6 +541,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 51d7c117e3c7..c07bee31abe8 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; @@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..78241b736f2a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22ab408177b2..eeea634bee0a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c) static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6490f618e096..b52bd2b6cdb4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -26,6 +26,7 @@ #include <asm/apic.h> #include <asm/intel-family.h> #include <asm/i8259.h> +#include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1433,6 +1434,9 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; + /* Don't change UV TSC multi-chassis synchronization */ + if (is_early_uv_system()) + return; if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d7e9bce6ff61..51b953ad9d4e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +/* + * In some cases, we need to preserve the GFN of a non-present or reserved + * SPTE when we usurp the upper five bits of the physical address space to + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask + * left into the reserved bits, i.e. the GFN in the SPTE will be split into + * high and low parts. This mask covers the lower bits of the GFN. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); @@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | - shadow_nonpresent_or_rsvd_mask; - u64 gpa = spte & ~mask; + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) & shadow_nonpresent_or_rsvd_mask; @@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static void kvm_mmu_reset_all_pte_masks(void) { + u8 low_phys_bits; + shadow_user_mask = 0; shadow_accessed_mask = 0; shadow_dirty_mask = 0; @@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void) * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. */ + low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_data.x86_phys_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) + 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = rsvd_bits(boot_cpu_data.x86_phys_bits - shadow_nonpresent_or_rsvd_mask_len, boot_cpu_data.x86_phys_bits - 1); + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; + } + shadow_nonpresent_or_rsvd_lower_gfn_mask = + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } static int is_cpuid_PSE36(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 06412ba46aa3..612fd17be635 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 -#define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL @@ -857,6 +856,7 @@ struct nested_vmx { /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ u64 vmcs01_debugctl; + u64 vmcs01_guest_bndcfgs; u16 vpid02; u16 last_vpid; @@ -2899,8 +2899,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } - if (is_long_mode(&vmx->vcpu)) - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else savesegment(fs, fs_sel); savesegment(gs, gs_sel); @@ -2951,8 +2950,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) vmx->loaded_cpu_state = NULL; #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif if (host_state->ldt_sel || (host_state->gs_sel & 7)) { kvm_load_ldt(host_state->ldt_sel); @@ -2980,24 +2978,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - rdmsrl(MSR_KERNEL_GS_BASE, - vmx->msr_guest_kernel_gs_base); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + preempt_enable(); return vmx->msr_guest_kernel_gs_base; } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); vmx->msr_guest_kernel_gs_base = data; } #endif @@ -3533,9 +3526,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (kvm_mpx_supported()) - msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; @@ -3552,8 +3542,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_ENTRY_LOAD_IA32_PAT; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (kvm_mpx_supported()) - msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; @@ -3601,12 +3589,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high); msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING; + /* * We can emulate "VMCS shadowing," even if the hardware * doesn't support it. @@ -3663,6 +3651,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (flexpriority_enabled) + msrs->secondary_ctls_high |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, msrs->misc_low, @@ -5073,19 +5065,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!msr) return; - /* - * MSR_KERNEL_GS_BASE is not intercepted when the guest is in - * 64-bit mode as a 64-bit kernel may frequently access the - * MSR. This means we need to manually save/restore the MSR - * when switching between guest and host state, but only if - * the guest is in 64-bit mode. Sync our cached value if the - * guest is transitioning to 32-bit mode and the CPU contains - * guest state, i.e. the cache is stale. - */ -#ifdef CONFIG_X86_64 - if (!(efer & EFER_LMA)) - (void)vmx_read_guest_kernel_gs_base(vmx); -#endif vcpu->arch.efer = efer; if (efer & EFER_LMA) { vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); @@ -6078,9 +6057,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } - if (is_long_mode(vcpu)) - mode |= MSR_BITMAP_MODE_LM; - return mode; } @@ -6121,9 +6097,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) if (!changed) return; - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, - !(mode & MSR_BITMAP_MODE_LM)); - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); @@ -6189,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) nested_mark_vmcs12_pages_dirty(vcpu); } +static u8 vmx_get_rvi(void) +{ + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; +} + static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6201,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) return false; - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff; + rvi = vmx_get_rvi(); vapic_page = kmap(vmx->nested.virtual_apic_page); vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); @@ -10245,15 +10223,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + if (!flexpriority_enabled && + !cpu_has_vmx_virtualize_x2apic_mode()) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_need_tpr_shadow(vcpu)) - return; - sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -10375,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) +{ + u8 rvi = vmx_get_rvi(); + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -11264,6 +11251,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) #undef cr4_fixed1_update } +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_mpx_supported()) { + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); + + if (mpx_enabled) { + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + } else { + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; + } + } +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -11280,8 +11284,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (nested_vmx_allowed(vcpu)) + if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); + nested_vmx_entry_exit_ctls_update(vcpu); + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -12049,8 +12055,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) set_cr4_guest_host_mask(vmx); - if (vmx_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + if (kvm_mpx_supported()) { + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + else + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); + } if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -12595,15 +12606,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); bool from_vmentry = !!exit_qual; u32 dummy_exit_qual; - u32 vmcs01_cpu_exec_ctrl; + bool evaluate_pending_interrupts; int r = 0; - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + if (kvm_mpx_supported() && + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); @@ -12643,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) * to L1 or delivered directly to L2 (e.g. In case L1 don't * intercept EXTERNAL_INTERRUPT). * - * Usually this would be handled by L0 requesting a - * IRQ/NMI window by setting VMCS accordingly. However, - * this setting was done on VMCS01 and now VMCS02 is active - * instead. Thus, we force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - if (vmcs01_cpu_exec_ctrl & - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { + * Usually this would be handled by the processor noticing an + * IRQ/NMI window request, or checking RVI during evaluation of + * pending virtual interrupts. However, this setting was done + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. + */ + if (unlikely(evaluate_pending_interrupts)) kvm_make_request(KVM_REQ_EVENT, vcpu); - } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edbf00ec56b3..ca717737347e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4698,7 +4698,7 @@ static void kvm_init_msr_list(void) */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) continue; break; case MSR_TSC_AUX: |