aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml6
-rw-r--r--arch/arm64/include/asm/el2_setup.h6
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h71
-rw-r--r--arch/arm64/include/asm/kvm_host.h25
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h9
-rw-r--r--arch/arm64/kvm/arm.c76
-rw-r--r--arch/arm64/kvm/emulate-nested.c21
-rw-r--r--arch/arm64/kvm/fpsimd.c11
-rw-r--r--arch/arm64/kvm/guest.c3
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c18
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h36
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c84
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c17
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c25
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c24
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c12
-rw-r--r--arch/arm64/kvm/nested.c6
-rw-r--r--arch/arm64/kvm/reset.c3
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500-ref.dts4
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000-ref.dts4
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts2
-rw-r--r--arch/loongarch/include/asm/numa.h1
-rw-r--r--arch/loongarch/include/asm/stackframe.h2
-rw-r--r--arch/loongarch/kernel/head.S2
-rw-r--r--arch/loongarch/kernel/setup.c6
-rw-r--r--arch/loongarch/kernel/smp.c5
-rw-r--r--arch/loongarch/kernel/vmlinux.lds.S10
-rw-r--r--arch/riscv/kvm/aia_device.c7
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c4
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/vmxfeatures.h2
-rw-r--r--arch/x86/kvm/Kconfig11
-rw-r--r--arch/x86/kvm/lapic.c39
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c46
-rw-r--r--arch/x86/kvm/mmu/spte.h9
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c2
-rw-r--r--arch/x86/kvm/svm/sev.c19
-rw-r--r--arch/x86/kvm/svm/svm.c69
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--drivers/char/tpm/tpm.h2
-rw-r--r--drivers/char/tpm/tpm_tis_core.c3
-rw-r--r--drivers/char/tpm/tpm_tis_core.h2
-rw-r--r--drivers/cxl/core/region.c18
-rw-r--r--drivers/firmware/efi/libstub/loongarch.c2
-rw-r--r--drivers/i2c/busses/i2c-synquacer.c11
-rw-r--r--drivers/of/irq.c125
-rw-r--r--drivers/of/of_private.h3
-rw-r--r--drivers/of/of_test.c1
-rw-r--r--drivers/of/property.c30
-rw-r--r--include/linux/i2c.h1
-rw-r--r--tools/testing/cxl/test/mem.c1
-rw-r--r--tools/testing/selftests/alsa/Makefile2
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c1
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c1
-rw-r--r--tools/testing/selftests/ftrace/config26
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc20
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc3
-rw-r--r--tools/testing/selftests/futex/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c2
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c1
72 files changed, 737 insertions, 277 deletions
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
index d2dce238ff5d..3e996346b264 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -54,11 +54,10 @@ unevaluatedProperties: false
examples:
- |
- mlahb: ahb@38000000 {
+ ahb {
compatible = "st,mlahb", "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
- reg = <0x10000000 0x40000>;
ranges;
dma-ranges = <0x00000000 0x38000000 0x10000>,
<0x10000000 0x10000000 0x60000>,
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index c6d0d8d81ed4..c2a158b75e49 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -57,17 +57,17 @@ properties:
- const: allwinner,sun8i-v3s
- description: Anbernic RG35XX (2024)
- - items:
+ items:
- const: anbernic,rg35xx-2024
- const: allwinner,sun50i-h700
- description: Anbernic RG35XX Plus
- - items:
+ items:
- const: anbernic,rg35xx-plus
- const: allwinner,sun50i-h700
- description: Anbernic RG35XX H
- - items:
+ items:
- const: anbernic,rg35xx-h
- const: allwinner,sun50i-h700
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index e4546b29dd0c..fd87c4b8f984 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -146,7 +146,7 @@
/* Coprocessor traps */
.macro __init_el2_cptr
__check_hvhe .LnVHE_\@, x1
- mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+ mov x0, #CPACR_ELx_FPEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
.LnVHE_\@:
@@ -277,7 +277,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SVE traps
- orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+ orr x0, x0, #CPACR_ELx_ZEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
@@ -298,7 +298,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SME traps
- orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+ orr x0, x0, #CPACR_ELx_SMEN
msr cpacr_el1, x0
b .Lskip_set_cptr_sme_\@
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e01bb5ca13b7..b2adc2c6c82a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -305,6 +305,12 @@
GENMASK(19, 14) | \
BIT(11))
+#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(27, 26) | \
+ GENMASK(23, 22) | \
+ GENMASK(19, 18) | \
+ GENMASK(15, 0))
+
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_E2TB_MASK (UL(0x3))
#define MDCR_EL2_E2TB_SHIFT (UL(24))
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 501e3e019c93..21650e7924d4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -557,6 +557,68 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
+#define __build_check_all_or_none(r, bits) \
+ BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
+
+#define __cpacr_to_cptr_clr(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((set) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((set) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((set) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((clr) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((clr) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((clr) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define __cpacr_to_cptr_set(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((clr) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((clr) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((clr) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((set) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((set) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((set) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define cpacr_clear_set(clr, set) \
+ do { \
+ BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
+ BUILD_BUG_ON((clr) & CPACR_ELx_E0POE); \
+ __build_check_all_or_none((clr), CPACR_ELx_FPEN); \
+ __build_check_all_or_none((set), CPACR_ELx_FPEN); \
+ __build_check_all_or_none((clr), CPACR_ELx_ZEN); \
+ __build_check_all_or_none((set), CPACR_ELx_ZEN); \
+ __build_check_all_or_none((clr), CPACR_ELx_SMEN); \
+ __build_check_all_or_none((set), CPACR_ELx_SMEN); \
+ \
+ if (has_vhe() || has_hvhe()) \
+ sysreg_clear_set(cpacr_el1, clr, set); \
+ else \
+ sysreg_clear_set(cptr_el2, \
+ __cpacr_to_cptr_clr(clr, set), \
+ __cpacr_to_cptr_set(clr, set));\
+ } while (0)
+
static __always_inline void kvm_write_cptr_el2(u64 val)
{
if (has_vhe() || has_hvhe())
@@ -570,17 +632,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
u64 val;
if (has_vhe()) {
- val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL1EN);
+ val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN_EL1EN;
} else if (has_hvhe()) {
- val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val = CPACR_ELx_FPEN;
if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+ val |= CPACR_ELx_ZEN;
if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
+ val |= CPACR_ELx_SMEN;
} else {
val = CPTR_NVHE_EL2_RES1;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8170c04fde91..36b8e97bf49e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
@@ -521,6 +522,20 @@ struct kvm_cpu_context {
u64 *vncr_array;
};
+struct cpu_sve_state {
+ __u64 zcr_el1;
+
+ /*
+ * Ordering is important since __sve_save_state/__sve_restore_state
+ * relies on it.
+ */
+ __u32 fpsr;
+ __u32 fpcr;
+
+ /* Must be SVE_VQ_BYTES (128 bit) aligned. */
+ __u8 sve_regs[];
+};
+
/*
* This structure is instantiated on a per-CPU basis, and contains
* data that is:
@@ -534,7 +549,15 @@ struct kvm_cpu_context {
*/
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
- struct user_fpsimd_state *fpsimd_state; /* hyp VA */
+
+ /*
+ * All pointers in this union are hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+ union {
+ struct user_fpsimd_state *fpsimd_state;
+ struct cpu_sve_state *sve_state;
+ };
/* Ownership of the FP regs */
enum {
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e80464f8953..b05bceca3385 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -111,7 +111,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_restore_state(void *sve_pffr, u32 *fpsr);
+void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
+void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
u64 __guest_enter(struct kvm_vcpu *vcpu);
@@ -142,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ad9cfb5c1ff4..cd56acd9a842 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
}
+static inline size_t pkvm_host_sve_state_size(void)
+{
+ if (!system_supports_sve())
+ return 0;
+
+ return size_add(sizeof(struct cpu_sve_state),
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9996a989b52e..59716789fe0f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
return size ? get_order(size) : 0;
}
+static size_t pkvm_host_sve_state_order(void)
+{
+ return get_order(pkvm_host_sve_state_size());
+}
+
/* A lookup table holding the hypervisor VA for each vector slot */
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
@@ -2310,12 +2315,20 @@ static void __init teardown_subsystems(void)
static void __init teardown_hyp_mode(void)
{
+ bool free_sve = system_supports_sve() && is_protected_kvm_enabled();
int cpu;
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (free_sve) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+ }
}
}
@@ -2398,6 +2411,58 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
return 0;
}
+static int init_pkvm_host_sve_state(void)
+{
+ int cpu;
+
+ if (!system_supports_sve())
+ return 0;
+
+ /* Allocate pages for host sve state in protected mode. */
+ for_each_possible_cpu(cpu) {
+ struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
+
+ if (!page)
+ return -ENOMEM;
+
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+ }
+
+ /*
+ * Don't map the pages in hyp since these are only used in protected
+ * mode, which will (re)create its own mapping when initialized.
+ */
+
+ return 0;
+}
+
+/*
+ * Finalizes the initialization of hyp mode, once everything else is initialized
+ * and the initialziation process cannot fail.
+ */
+static void finalize_init_hyp_mode(void)
+{
+ int cpu;
+
+ if (system_supports_sve() && is_protected_kvm_enabled()) {
+ for_each_possible_cpu(cpu) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+ kern_hyp_va(sve_state);
+ }
+ } else {
+ for_each_possible_cpu(cpu) {
+ struct user_fpsimd_state *fpsimd_state;
+
+ fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+ kern_hyp_va(fpsimd_state);
+ }
+ }
+}
+
static void pkvm_hyp_init_ptrauth(void)
{
struct kvm_cpu_context *hyp_ctxt;
@@ -2566,6 +2631,10 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = init_pkvm_host_sve_state();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
@@ -2730,6 +2799,13 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ /*
+ * This should be called after initialization is done and failure isn't
+ * possible anymore.
+ */
+ if (!in_hyp_mode)
+ finalize_init_hyp_mode();
+
kvm_arm_initialised = true;
return 0;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 72d733c74a38..54090967a335 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2181,16 +2181,23 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
if (forward_traps(vcpu, HCR_NV))
return;
+ spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+ spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+
/* Check for an ERETAx */
esr = kvm_vcpu_get_esr(vcpu);
if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
/*
- * Oh no, ERETAx failed to authenticate. If we have
- * FPACCOMBINE, deliver an exception right away. If we
- * don't, then let the mangled ELR value trickle down the
+ * Oh no, ERETAx failed to authenticate.
+ *
+ * If we have FPACCOMBINE and we don't have a pending
+ * Illegal Execution State exception (which has priority
+ * over FPAC), deliver an exception right away.
+ *
+ * Otherwise, let the mangled ELR value trickle down the
* ERET handling, and the guest will have a little surprise.
*/
- if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
+ if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE) && !(spsr & PSR_IL_BIT)) {
esr &= ESR_ELx_ERET_ISS_ERETA;
esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
kvm_inject_nested_sync(vcpu, esr);
@@ -2201,17 +2208,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
- spsr = kvm_check_illegal_exception_return(vcpu, spsr);
if (!esr_iss_is_eretax(esr))
elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
- /*
- * Note that the current exception level is always the virtual EL2,
- * since we set HCR_EL2.NV bit only when entering the virtual EL2.
- */
*vcpu_pc(vcpu) = elr;
*vcpu_cpsr(vcpu) = spsr;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 1807d3a79a8a..521b32868d0d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -90,6 +90,13 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
fpsimd_save_and_flush_cpu_state();
}
}
+
+ /*
+ * If normal guests gain SME support, maintain this behavior for pKVM
+ * guests, which don't support SME.
+ */
+ WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
+ read_sysreg_s(SYS_SVCR));
}
/*
@@ -161,9 +168,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0,
- CPACR_EL1_SMEN_EL0EN |
- CPACR_EL1_SMEN_EL1EN);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
else
sysreg_clear_set(CPACR_EL1,
CPACR_EL1_SMEN_EL0EN,
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e2f762d959bb..11098eb7eb44 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case PSR_AA32_MODE_SVC:
case PSR_AA32_MODE_ABT:
case PSR_AA32_MODE_UND:
+ case PSR_AA32_MODE_SYS:
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
@@ -276,7 +277,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i, nr_reg;
- switch (*vcpu_cpsr(vcpu)) {
+ switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
/*
* Either we are dealing with user mode, and only the
* first 15 registers (+ PC) must be narrowed to 32bit.
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 8d9670e6615d..449fa58cf3b6 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
u32 cpsr_cond;
int cond;
- /* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_esr(vcpu) >> 30)
+ /*
+ * These are the exception classes that could fire with a
+ * conditional instruction.
+ */
+ switch (kvm_vcpu_trap_get_class(vcpu)) {
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_FP_ASIMD:
+ case ESR_ELx_EC_CP10_ID:
+ case ESR_ELx_EC_CP14_64:
+ case ESR_ELx_EC_SVC32:
+ break;
+ default:
return true;
+ }
/* Is condition field valid? */
cond = kvm_vcpu_get_condition(vcpu);
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b7d..e950875e31ce 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a92566f36022..0c4de44534b7 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -316,10 +316,24 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr);
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ true);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void __hyp_sve_save_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
@@ -330,7 +344,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
- u64 reg;
if (!system_supports_fpsimd())
return false;
@@ -353,24 +366,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ else
+ cpacr_clear_set(0, CPACR_ELx_FPEN);
isb();
/* Write out the host state if it's in the registers */
if (host_owns_fp_regs())
- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 22f374e9f532..24a9a8330d19 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -59,7 +59,6 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
}
void pkvm_hyp_vm_table_init(void *tbl);
-void pkvm_host_fpsimd_state_init(void);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index d5c48dc98f67..f43d845f3c4e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,20 +23,80 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ /*
+ * On saving/restoring guest sve state, always use the maximum VL for
+ * the guest. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) guest VL.
+ */
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ /*
+ * On saving/restoring host sve state, always use the maximum VL for
+ * the host. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) host VL.
+ *
+ * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
+ * supported by the system (or limited at EL3).
+ */
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
+ /* Limit guest vector length to the maximum supported by the host. */
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
@@ -54,10 +114,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
@@ -79,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
struct pkvm_hyp_vcpu *hyp_vcpu;
struct kvm *host_kvm;
+ /*
+ * KVM (and pKVM) doesn't support SME guests for now, and
+ * ensures that SME features aren't enabled in pstate when
+ * loading a vcpu. Therefore, if SME features enabled the host
+ * is misbehaving.
+ */
+ if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
host_kvm = kern_hyp_va(host_vcpu->kvm);
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
host_vcpu->vcpu_idx);
@@ -405,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+ cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
break;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 16aa4875ddb8..95cf18574251 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Trap SVE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ cptr_clear |= CPACR_ELx_ZEN;
else
cptr_set |= CPTR_EL2_TZ;
}
@@ -247,17 +249,6 @@ void pkvm_hyp_vm_table_init(void *tbl)
vm_table = tbl;
}
-void pkvm_host_fpsimd_state_init(void)
-{
- unsigned long i;
-
- for (i = 0; i < hyp_nr_cpus; i++) {
- struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
-
- host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
- }
-}
-
/*
* Return the hyp vm structure corresponding to the handle.
*/
@@ -586,6 +577,8 @@ unlock:
if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 859f22f754d3..f4350ba07b0b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct cpu_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ pkvm_create_host_sve_mappings();
+
/*
* Map the host sections RO in the hypervisor, but transfer the
* ownership from the host to the hypervisor itself to make sure they
@@ -300,7 +324,6 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
- pkvm_host_fpsimd_state_init();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6758cd905570..6af179c6356d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
if (cpus_have_final_cap(ARM64_SME)) {
if (has_hvhe())
- val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
+ val &= ~CPACR_ELx_SMEN;
else
val |= CPTR_EL2_TSM;
}
if (!guest_owns_fp_regs()) {
if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+ val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
else
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
@@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_ELx_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ }
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index d7af5f46f22a..8fbb6a2e0559 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -93,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
+ val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -109,9 +108,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ val |= CPACR_ELx_ZEN;
} else {
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val &= ~CPACR_ELx_FPEN;
__activate_traps_fpsimd32(vcpu);
}
@@ -262,6 +261,11 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 6813c7c7f00a..bae8536cbf00 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -58,8 +58,10 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64PFR1_EL1:
- /* Only support SSBS */
- val &= NV_FTR(PFR1, SSBS);
+ /* Only support BTI, SSBS, CSV2_frac */
+ val &= (NV_FTR(PFR1, BT) |
+ NV_FTR(PFR1, SSBS) |
+ NV_FTR(PFR1, CSV2_frac));
break;
case SYS_ID_AA64MMFR0_EL1:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 1b7b58cb121f..3fc8ca164dbe 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -32,6 +32,7 @@
/* Maximum phys_shift supported for any VM on this host */
static u32 __ro_after_init kvm_ipa_limit;
+unsigned int __ro_after_init kvm_host_sve_max_vl;
/*
* ARMv8 Reset Values
@@ -51,6 +52,8 @@ int __init kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl();
+ kvm_host_sve_max_vl = sve_max_vl();
+ kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
index 8aefb0c12672..a34734a6c3ce 100644
--- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -44,14 +44,14 @@
&gmac0 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
bus_id = <0x0>;
};
&gmac1 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
bus_id = <0x1>;
};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
index 8463fe035386..23cf26cc3e5f 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -43,7 +43,7 @@
&gmac0 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy0>;
mdio {
compatible = "snps,dwmac-mdio";
@@ -58,7 +58,7 @@
&gmac1 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy1>;
mdio {
compatible = "snps,dwmac-mdio";
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index 74b99bd234cc..ea9e6985d0e9 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -92,7 +92,7 @@
&gmac2 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy2>;
mdio {
compatible = "snps,dwmac-mdio";
diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h
index 27f319b49862..b5f9de9f102e 100644
--- a/arch/loongarch/include/asm/numa.h
+++ b/arch/loongarch/include/asm/numa.h
@@ -56,6 +56,7 @@ extern int early_cpu_to_node(int cpu);
static inline void early_numa_add_cpu(int cpuid, s16 node) { }
static inline void numa_add_cpu(unsigned int cpu) { }
static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void set_cpuid_to_node(int cpuid, s16 node) { }
static inline int early_cpu_to_node(int cpu)
{
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 45b507a7b06f..d9eafd3ee3d1 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -42,7 +42,7 @@
.macro JUMP_VIRT_ADDR temp1 temp2
li.d \temp1, CACHE_BASE
pcaddi \temp2, 0
- or \temp1, \temp1, \temp2
+ bstrins.d \temp1, \temp2, (DMW_PABITS - 1), 0
jirl zero, \temp1, 0xc
.endm
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index c4f7de2e2805..4677ea8fa8e9 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -22,7 +22,7 @@
_head:
.word MZ_MAGIC /* "MZ", MS-DOS header */
.org 0x8
- .dword kernel_entry /* Kernel entry point */
+ .dword _kernel_entry /* Kernel entry point (physical address) */
.dword _kernel_asize /* Kernel image effective size */
.quad PHYS_LINK_KADDR /* Kernel image load offset from start of RAM */
.org 0x38 /* 0x20 ~ 0x37 reserved */
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 60e0fe97f61a..3d048f1be143 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -282,7 +282,7 @@ static void __init fdt_setup(void)
return;
/* Prefer to use built-in dtb, checking its legality first. */
- if (!fdt_check_header(__dtb_start))
+ if (IS_ENABLED(CONFIG_BUILTIN_DTB) && !fdt_check_header(__dtb_start))
fdt_pointer = __dtb_start;
else
fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
@@ -351,10 +351,8 @@ void __init platform_init(void)
arch_reserve_vmcore();
arch_reserve_crashkernel();
-#ifdef CONFIG_ACPI_TABLE_UPGRADE
- acpi_table_upgrade();
-#endif
#ifdef CONFIG_ACPI
+ acpi_table_upgrade();
acpi_gbl_use_default_register_widths = false;
acpi_boot_table_init();
#endif
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 0dfe2388ef41..1436d2465939 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -273,7 +273,6 @@ static void __init fdt_smp_setup(void)
if (cpuid == loongson_sysconf.boot_cpu_id) {
cpu = 0;
- numa_add_cpu(cpu);
} else {
cpu = cpumask_next_zero(-1, cpu_present_mask);
}
@@ -283,6 +282,9 @@ static void __init fdt_smp_setup(void)
set_cpu_present(cpu, true);
__cpu_number_map[cpuid] = cpu;
__cpu_logical_map[cpu] = cpuid;
+
+ early_numa_add_cpu(cpu, 0);
+ set_cpuid_to_node(cpuid, 0);
}
loongson_sysconf.nr_cpus = num_processors;
@@ -468,6 +470,7 @@ void smp_prepare_boot_cpu(void)
set_cpu_possible(0, true);
set_cpu_online(0, true);
set_my_cpu_offset(per_cpu_offset(0));
+ numa_add_cpu(0);
rr_node = first_node(node_online_map);
for_each_possible_cpu(cpu) {
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index e8e97dbf9ca4..3c7595342730 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -6,6 +6,7 @@
#define PAGE_SIZE _PAGE_SIZE
#define RO_EXCEPTION_TABLE_ALIGN 4
+#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */
/*
* Put .bss..swapper_pg_dir as the first thing in .bss. This will
@@ -142,10 +143,11 @@ SECTIONS
#ifdef CONFIG_EFI_STUB
/* header symbols */
- _kernel_asize = _end - _text;
- _kernel_fsize = _edata - _text;
- _kernel_vsize = _end - __initdata_begin;
- _kernel_rsize = _edata - __initdata_begin;
+ _kernel_entry = ABSOLUTE(kernel_entry & PHYSADDR_MASK);
+ _kernel_asize = ABSOLUTE(_end - _text);
+ _kernel_fsize = ABSOLUTE(_edata - _text);
+ _kernel_vsize = ABSOLUTE(_end - __initdata_begin);
+ _kernel_rsize = ABSOLUTE(_edata - __initdata_begin);
#endif
.gptab.sdata : {
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 0eb689351b7d..5cd407c6a8e4 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
{
- u32 hart, group = 0;
+ u32 hart = 0, group = 0;
- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
- GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_hart_bits)
+ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
if (aia->nr_group_bits)
group = (addr >> aia->nr_group_shift) &
GENMASK_ULL(aia->nr_group_bits - 1, 0);
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index c676275ea0a0..62874fbca29f 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -724,9 +724,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
switch (reg_subtype) {
case KVM_REG_RISCV_ISA_SINGLE:
return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
- case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_EN:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
- case KVM_REG_RISCV_SBI_MULTI_DIS:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
default:
return -ENOENT;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ece45b3f6f20..f8ca74e7678f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, unsigned long roots);
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 266daf5b5b84..695f36664889 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -77,7 +77,7 @@
#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d64fb2b3eb69..fec95a770270 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -44,6 +44,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
+ select KVM_WERROR if WERROR
help
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
@@ -66,7 +67,7 @@ config KVM_WERROR
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
# Building KVM with -Werror and KASAN is still doable via enabling
# the kernel-wide WERROR=y.
- depends on KVM && EXPERT && !KASAN
+ depends on KVM && ((EXPERT && !KASAN) || WERROR)
help
Add -Werror to the build flags for KVM.
@@ -97,15 +98,17 @@ config KVM_INTEL
config KVM_INTEL_PROVE_VE
bool "Check that guests do not receive #VE exceptions"
- default KVM_PROVE_MMU || DEBUG_KERNEL
- depends on KVM_INTEL
+ depends on KVM_INTEL && EXPERT
help
-
Checks that KVM's page table management code will not incorrectly
let guests receive a virtualization exception. Virtualization
exceptions will be trapped by the hypervisor rather than injected
in the guest.
+ Note: some CPUs appear to generate spurious EPT Violations #VEs
+ that trigger KVM's WARN, in particular with eptad=0 and/or nested
+ virtualization.
+
If unsure, say N.
config X86_SGX_KVM
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ebf41023be38..acd7d48100a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,7 +59,17 @@
#define MAX_APIC_VECTOR 256
#define APIC_VECTORS_PER_REG 32
-static bool lapic_timer_advance_dynamic __read_mostly;
+/*
+ * Enable local APIC timer advancement (tscdeadline mode only) with adaptive
+ * tuning. When enabled, KVM programs the host timer event to fire early, i.e.
+ * before the deadline expires, to account for the delay between taking the
+ * VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
+ * the guest, i.e. so that the interrupt arrives in the guest with minimal
+ * latency relative to the deadline programmed by the guest.
+ */
+static bool lapic_timer_advance __read_mostly = true;
+module_param(lapic_timer_advance, bool, 0444);
+
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
@@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
- if (lapic_timer_advance_dynamic) {
- adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
- /*
- * If the timer fired early, reread the TSC to account for the
- * overhead of the above adjustment to avoid waiting longer
- * than is necessary.
- */
- if (guest_tsc < tsc_deadline)
- guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- }
+ adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
+
+ /*
+ * If the timer fired early, reread the TSC to account for the overhead
+ * of the above adjustment to avoid waiting longer than is necessary.
+ */
+ if (guest_tsc < tsc_deadline)
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic;
@@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
- if (timer_advance_ns == -1) {
+ if (lapic_timer_advance)
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
- lapic_timer_advance_dynamic = true;
- } else {
- apic->lapic_timer.timer_advance_ns = timer_advance_ns;
- lapic_timer_advance_dynamic = false;
- }
/*
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0a0ea4b5dd8c..a69e706b9080 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -85,7 +85,7 @@ struct kvm_lapic {
struct dest_map;
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 662f62dfb2aa..8d74bdef68c1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -336,16 +336,19 @@ static int is_cpuid_PSE36(void)
#ifdef CONFIG_X86_64
static void __set_spte(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
WRITE_ONCE(*sptep, spte);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
WRITE_ONCE(*sptep, spte);
}
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
return xchg(sptep, spte);
}
@@ -4101,23 +4104,31 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
return leaf;
}
-/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
-static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
{
- u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
- struct rsvd_bits_validate *rsvd_check;
- int root, leaf, level;
- bool reserved = false;
+ int leaf;
walk_shadow_page_lockless_begin(vcpu);
if (is_tdp_mmu_active(vcpu))
- leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level);
else
- leaf = get_walk(vcpu, addr, sptes, &root);
+ leaf = get_walk(vcpu, addr, sptes, root_level);
walk_shadow_page_lockless_end(vcpu);
+ return leaf;
+}
+
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
+static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+ struct rsvd_bits_validate *rsvd_check;
+ int root, leaf, level;
+ bool reserved = false;
+ leaf = get_sptes_lockless(vcpu, addr, sptes, &root);
if (unlikely(leaf < 0)) {
*sptep = 0ull;
return reserved;
@@ -4400,9 +4411,6 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
return RET_PF_EMULATE;
}
- fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
- smp_rmb();
-
/*
* Check for a relevant mmu_notifier invalidation event before getting
* the pfn from the primary MMU, and before acquiring mmu_lock.
@@ -5921,6 +5929,22 @@ emulate:
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+ int root_level, leaf, level;
+
+ leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level);
+ if (unlikely(leaf < 0))
+ return;
+
+ pr_err("%s %llx", msg, gpa);
+ for (level = root_level; level >= leaf; level--)
+ pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
+ pr_cont("\n");
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
+
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, hpa_t root_hpa)
{
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 5dd5405fa07a..52fa004a1fbc 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -3,6 +3,8 @@
#ifndef KVM_X86_MMU_SPTE_H
#define KVM_X86_MMU_SPTE_H
+#include <asm/vmx.h>
+
#include "mmu.h"
#include "mmu_internal.h"
@@ -276,6 +278,13 @@ static inline bool is_shadow_present_pte(u64 pte)
return !!(pte & SPTE_MMU_PRESENT_MASK);
}
+static inline bool is_ept_ve_possible(u64 spte)
+{
+ return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) &&
+ !(spte & VMX_EPT_SUPPRESS_VE_BIT) &&
+ (spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE;
+}
+
/*
* Returns true if A/D bits are supported in hardware and are enabled by KVM.
* When enabled, KVM uses A/D bits for all non-nested MMUs. Because L1 can
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index fae559559a80..2880fd392e0c 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -21,11 +21,13 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
return xchg(rcu_dereference(sptep), new_spte);
}
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1259dd63defc..36539c1b36cd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -626,7 +626,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* SPTEs.
*/
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
- 0, iter->level, true);
+ SHADOW_NONPRESENT_VALUE, iter->level, true);
return 0;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0623cfaa7bb0..95095a233a45 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -779,6 +779,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
*/
fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.guest_state_protected = true;
+
+ /*
+ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+ * only after setting guest_state_protected because KVM_SET_MSRS allows
+ * dynamic toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
return 0;
}
@@ -2406,6 +2414,12 @@ void __init sev_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
goto out;
+ if (!lbrv) {
+ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
+ "LBRV must be present for SEV-ES support");
+ goto out;
+ }
+
/* Has the system been allocated ASIDs for SEV-ES? */
if (min_sev_asid == 1)
goto out;
@@ -3216,7 +3230,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
@@ -3268,10 +3281,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/* Clear intercepts on selected MSRs */
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c8dc25886c16..296c524988f9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
{ .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_FLUSH_CMD, .always = false },
+ { .index = MSR_IA32_DEBUGCTLMSR, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -215,7 +216,7 @@ int vgif = true;
module_param(vgif, int, 0444);
/* enable/disable LBR virtualization */
-static int lbrv = true;
+int lbrv = true;
module_param(lbrv, int, 0444);
static int tsc_scaling = true;
@@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
-static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+ if (sev_es_guest(vcpu->kvm))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
+
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
@@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
@@ -2822,10 +2828,24 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
return 0;
}
+static bool
+sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ return sev_es_guest(vcpu->kvm) &&
+ vcpu->arch.guest_state_protected &&
+ svm_msrpm_offset(msr_info->index) != MSR_INVALID &&
+ !msr_write_intercepted(vcpu, msr_info->index);
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (sev_es_prevent_msr_access(vcpu, msr_info)) {
+ msr_info->data = 0;
+ return -EINVAL;
+ }
+
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
if (!msr_info->host_initiated &&
@@ -2976,6 +2996,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
+
+ if (sev_es_prevent_msr_access(vcpu, msr))
+ return -EINVAL;
+
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
@@ -3846,16 +3870,27 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
/*
- * KVM should never request an NMI window when vNMI is enabled, as KVM
- * allows at most one to-be-injected NMI and one pending NMI, i.e. if
- * two NMIs arrive simultaneously, KVM will inject one and set
- * V_NMI_PENDING for the other. WARN, but continue with the standard
- * single-step approach to try and salvage the pending NMI.
+ * If NMIs are outright masked, i.e. the vCPU is already handling an
+ * NMI, and KVM has not yet intercepted an IRET, then there is nothing
+ * more to do at this time as KVM has already enabled IRET intercepts.
+ * If KVM has already intercepted IRET, then single-step over the IRET,
+ * as NMIs aren't architecturally unmasked until the IRET completes.
+ *
+ * If vNMI is enabled, KVM should never request an NMI window if NMIs
+ * are masked, as KVM allows at most one to-be-injected NMI and one
+ * pending NMI. If two NMIs arrive simultaneously, KVM will inject one
+ * NMI and set V_NMI_PENDING for the other, but if and only if NMIs are
+ * unmasked. KVM _will_ request an NMI window in some situations, e.g.
+ * if the vCPU is in an STI shadow or if GIF=0, KVM can't immediately
+ * inject the NMI. In those situations, KVM needs to single-step over
+ * the STI shadow or intercept STGI.
*/
- WARN_ON_ONCE(is_vnmi_enabled(svm));
+ if (svm_get_nmi_mask(vcpu)) {
+ WARN_ON_ONCE(is_vnmi_enabled(svm));
- if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
- return; /* IRET will cause a vm exit */
+ if (!svm->awaiting_iret_completion)
+ return; /* IRET will cause a vm exit */
+ }
/*
* SEV-ES guests are responsible for signaling when a vCPU is ready to
@@ -5265,6 +5300,12 @@ static __init int svm_hardware_setup(void)
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
@@ -5318,14 +5359,6 @@ static __init int svm_hardware_setup(void)
svm_x86_ops.set_vnmi_pending = NULL;
}
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index be57213cd295..0f1472690b59 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 47
+#define MAX_DIRECT_ACCESS_MSRS 48
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -39,6 +39,7 @@ extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;
+extern int lbrv;
/*
* Clean bits in VMCB.
@@ -552,6 +553,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d5b832126e34..643935a0f70a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2242,6 +2242,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmcs_write64(EPT_POINTER,
construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
+ if (vmx->ve_info)
+ vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info));
+
/* All VMFUNCs are currently emulated through L0 vmexits. */
if (cpu_has_vmx_vmfunc())
vmcs_write64(VM_FUNCTION_CONTROL, 0);
@@ -6230,6 +6233,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
else if (is_alignment_check(intr_info) &&
!vmx_guest_inject_ac(vcpu))
return true;
+ else if (is_ve_fault(intr_info))
+ return true;
return false;
case EXIT_REASON_EXTERNAL_INTERRUPT:
return true;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6051fad5945f..b3c83c06f826 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5218,8 +5218,15 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
- if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm))
- return -EIO;
+ if (WARN_ON_ONCE(is_ve_fault(intr_info))) {
+ struct vmx_ve_information *ve_info = vmx->ve_info;
+
+ WARN_ONCE(ve_info->exit_reason != EXIT_REASON_EPT_VIOLATION,
+ "Unexpected #VE on VM-Exit reason 0x%x", ve_info->exit_reason);
+ dump_vmcs(vcpu);
+ kvm_mmu_print_sptes(vcpu, ve_info->guest_physical_address, "#VE");
+ return 1;
+ }
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 082ac6d95a3a..8c9e4281d978 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -164,15 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
-/*
- * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
- * adaptive tuning starting from default advancement of 1000ns. '0' disables
- * advancement entirely. Any other value is used as-is and disables adaptive
- * tuning, i.e. allows privileged userspace to set an exact advancement time.
- */
-static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, int, 0644);
-
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, 0444);
@@ -12169,7 +12160,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (r < 0)
return r;
- r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 6b8b9956ba69..7bb87fa5f7a1 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -28,7 +28,7 @@
#include <linux/tpm_eventlog.h>
#ifdef CONFIG_X86
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#endif
#define TPM_MINOR 224 /* officially assigned */
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 176cd8dbf1db..fdef214b9f6b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1020,7 +1020,8 @@ void tpm_tis_remove(struct tpm_chip *chip)
interrupt = 0;
tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
- flush_work(&priv->free_irq_work);
+ if (priv->free_irq_work.func)
+ flush_work(&priv->free_irq_work);
tpm_tis_clkrun_enable(chip, false);
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 13e99cf65efe..690ad8e9b731 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -210,7 +210,7 @@ static inline int tpm_tis_verify_crc(struct tpm_tis_data *data, size_t len,
static inline bool is_bsw(void)
{
#ifdef CONFIG_X86
- return ((boot_cpu_data.x86_model == INTEL_FAM6_ATOM_AIRMONT) ? 1 : 0);
+ return (boot_cpu_data.x86_vfm == INTEL_ATOM_AIRMONT) ? 1 : 0;
#else
return false;
#endif
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 00a9f0eef8dd..3c2b6144be23 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2352,15 +2352,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
struct device *dev;
int rc;
- switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
- break;
- default:
- dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
- return ERR_PTR(-EINVAL);
- }
-
cxlr = cxl_region_alloc(cxlrd, id);
if (IS_ERR(cxlr))
return cxlr;
@@ -2415,6 +2406,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
{
int rc;
+ switch (mode) {
+ case CXL_DECODER_RAM:
+ case CXL_DECODER_PMEM:
+ break;
+ default:
+ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+ return ERR_PTR(-EINVAL);
+ }
+
rc = memregion_alloc(GFP_KERNEL);
if (rc < 0)
return ERR_PTR(rc);
diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c
index 684c9354637c..d0ef93551c44 100644
--- a/drivers/firmware/efi/libstub/loongarch.c
+++ b/drivers/firmware/efi/libstub/loongarch.c
@@ -41,7 +41,7 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv)
unsigned long __weak kernel_entry_address(unsigned long kernel_addr,
efi_loaded_image_t *image)
{
- return *(unsigned long *)(kernel_addr + 8) - VMLINUX_LOAD_ADDRESS + kernel_addr;
+ return *(unsigned long *)(kernel_addr + 8) - PHYSADDR(VMLINUX_LOAD_ADDRESS) + kernel_addr;
}
efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image,
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index 31ecb2c7e978..4eccbcd0fbfc 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -138,7 +138,6 @@ struct synquacer_i2c {
int irq;
struct device *dev;
void __iomem *base;
- struct clk *pclk;
u32 pclkrate;
u32 speed_khz;
u32 timeout_ms;
@@ -535,6 +534,7 @@ static const struct i2c_adapter synquacer_i2c_ops = {
static int synquacer_i2c_probe(struct platform_device *pdev)
{
struct synquacer_i2c *i2c;
+ struct clk *pclk;
u32 bus_speed;
int ret;
@@ -550,13 +550,12 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
device_property_read_u32(&pdev->dev, "socionext,pclk-rate",
&i2c->pclkrate);
- i2c->pclk = devm_clk_get_enabled(&pdev->dev, "pclk");
- if (IS_ERR(i2c->pclk))
- return dev_err_probe(&pdev->dev, PTR_ERR(i2c->pclk),
+ pclk = devm_clk_get_enabled(&pdev->dev, "pclk");
+ if (IS_ERR(pclk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(pclk),
"failed to get and enable clock\n");
- dev_dbg(&pdev->dev, "clock source %p\n", i2c->pclk);
- i2c->pclkrate = clk_get_rate(i2c->pclk);
+ i2c->pclkrate = clk_get_rate(pclk);
if (i2c->pclkrate < SYNQUACER_I2C_MIN_CLK_RATE ||
i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE)
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 174900072c18..462375b293e4 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -25,6 +25,8 @@
#include <linux/string.h>
#include <linux/slab.h>
+#include "of_private.h"
+
/**
* irq_of_parse_and_map - Parse and map an interrupt into linux virq space
* @dev: Device node of the device whose interrupt is to be mapped
@@ -96,6 +98,57 @@ static const char * const of_irq_imap_abusers[] = {
NULL,
};
+const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len, struct of_phandle_args *out_irq)
+{
+ u32 intsize, addrsize;
+ struct device_node *np;
+
+ /* Get the interrupt parent */
+ if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+ np = of_node_get(of_irq_dflt_pic);
+ else
+ np = of_find_node_by_phandle(be32_to_cpup(imap));
+ imap++;
+
+ /* Check if not found */
+ if (!np) {
+ pr_debug(" -> imap parent not found !\n");
+ return NULL;
+ }
+
+ /* Get #interrupt-cells and #address-cells of new parent */
+ if (of_property_read_u32(np, "#interrupt-cells",
+ &intsize)) {
+ pr_debug(" -> parent lacks #interrupt-cells!\n");
+ of_node_put(np);
+ return NULL;
+ }
+ if (of_property_read_u32(np, "#address-cells",
+ &addrsize))
+ addrsize = 0;
+
+ pr_debug(" -> intsize=%d, addrsize=%d\n",
+ intsize, addrsize);
+
+ /* Check for malformed properties */
+ if (WARN_ON(addrsize + intsize > MAX_PHANDLE_ARGS)
+ || (len < (addrsize + intsize))) {
+ of_node_put(np);
+ return NULL;
+ }
+
+ pr_debug(" -> imaplen=%d\n", len);
+
+ imap += addrsize + intsize;
+
+ out_irq->np = np;
+ for (int i = 0; i < intsize; i++)
+ out_irq->args[i] = be32_to_cpup(imap - intsize + i);
+ out_irq->args_count = intsize;
+
+ return imap;
+}
+
/**
* of_irq_parse_raw - Low level interrupt tree parsing
* @addr: address specifier (start of "reg" property of the device) in be32 format
@@ -112,12 +165,12 @@ static const char * const of_irq_imap_abusers[] = {
*/
int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
{
- struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
+ struct device_node *ipar, *tnode, *old = NULL;
__be32 initial_match_array[MAX_PHANDLE_ARGS];
const __be32 *match_array = initial_match_array;
- const __be32 *tmp, *imap, *imask, dummy_imask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
- u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
- int imaplen, match, i, rc = -EINVAL;
+ const __be32 *tmp, dummy_imask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
+ u32 intsize = 1, addrsize;
+ int i, rc = -EINVAL;
#ifdef DEBUG
of_print_phandle_args("of_irq_parse_raw: ", out_irq);
@@ -176,6 +229,9 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
/* Now start the actual "proper" walk of the interrupt tree */
while (ipar != NULL) {
+ int imaplen, match;
+ const __be32 *imap, *oldimap, *imask;
+ struct device_node *newpar;
/*
* Now check if cursor is an interrupt-controller and
* if it is then we are done, unless there is an
@@ -216,7 +272,7 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
/* Parse interrupt-map */
match = 0;
- while (imaplen > (addrsize + intsize + 1) && !match) {
+ while (imaplen > (addrsize + intsize + 1)) {
/* Compare specifiers */
match = 1;
for (i = 0; i < (addrsize + intsize); i++, imaplen--)
@@ -224,48 +280,17 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
pr_debug(" -> match=%d (imaplen=%d)\n", match, imaplen);
- /* Get the interrupt parent */
- if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
- newpar = of_node_get(of_irq_dflt_pic);
- else
- newpar = of_find_node_by_phandle(be32_to_cpup(imap));
- imap++;
- --imaplen;
-
- /* Check if not found */
- if (newpar == NULL) {
- pr_debug(" -> imap parent not found !\n");
- goto fail;
- }
-
- if (!of_device_is_available(newpar))
- match = 0;
-
- /* Get #interrupt-cells and #address-cells of new
- * parent
- */
- if (of_property_read_u32(newpar, "#interrupt-cells",
- &newintsize)) {
- pr_debug(" -> parent lacks #interrupt-cells!\n");
- goto fail;
- }
- if (of_property_read_u32(newpar, "#address-cells",
- &newaddrsize))
- newaddrsize = 0;
-
- pr_debug(" -> newintsize=%d, newaddrsize=%d\n",
- newintsize, newaddrsize);
-
- /* Check for malformed properties */
- if (WARN_ON(newaddrsize + newintsize > MAX_PHANDLE_ARGS)
- || (imaplen < (newaddrsize + newintsize))) {
- rc = -EFAULT;
+ oldimap = imap;
+ imap = of_irq_parse_imap_parent(oldimap, imaplen, out_irq);
+ if (!imap)
goto fail;
- }
- imap += newaddrsize + newintsize;
- imaplen -= newaddrsize + newintsize;
+ match &= of_device_is_available(out_irq->np);
+ if (match)
+ break;
+ of_node_put(out_irq->np);
+ imaplen -= imap - oldimap;
pr_debug(" -> imaplen=%d\n", imaplen);
}
if (!match) {
@@ -287,11 +312,11 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
* Successfully parsed an interrupt-map translation; copy new
* interrupt specifier into the out_irq structure
*/
- match_array = imap - newaddrsize - newintsize;
- for (i = 0; i < newintsize; i++)
- out_irq->args[i] = be32_to_cpup(imap - newintsize + i);
- out_irq->args_count = intsize = newintsize;
- addrsize = newaddrsize;
+ match_array = oldimap + 1;
+
+ newpar = out_irq->np;
+ intsize = out_irq->args_count;
+ addrsize = (imap - match_array) - intsize;
if (ipar == newpar) {
pr_debug("%pOF interrupt-map entry to self\n", ipar);
@@ -300,7 +325,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
skiplevel:
/* Iterate again with new parent */
- out_irq->np = newpar;
pr_debug(" -> new parent: %pOF\n", newpar);
of_node_put(ipar);
ipar = newpar;
@@ -310,7 +334,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
fail:
of_node_put(ipar);
- of_node_put(newpar);
return rc;
}
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 94fc0aa07af9..04aa2a91f851 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -159,6 +159,9 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np,
extern int of_bus_n_addr_cells(struct device_node *np);
extern int of_bus_n_size_cells(struct device_node *np);
+const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len,
+ struct of_phandle_args *out_irq);
+
struct bus_dma_region;
#if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
int of_dma_get_range(struct device_node *np,
diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c
index a9301d293f01..c85a258bc6ae 100644
--- a/drivers/of/of_test.c
+++ b/drivers/of/of_test.c
@@ -54,4 +54,5 @@ static struct kunit_suite of_dtb_suite = {
kunit_test_suites(
&of_dtb_suite,
);
+MODULE_DESCRIPTION("KUnit tests for OF APIs");
MODULE_LICENSE("GPL");
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 1c83e68f805b..164d77cb9445 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1306,10 +1306,10 @@ static struct device_node *parse_interrupts(struct device_node *np,
static struct device_node *parse_interrupt_map(struct device_node *np,
const char *prop_name, int index)
{
- const __be32 *imap, *imap_end, *addr;
+ const __be32 *imap, *imap_end;
struct of_phandle_args sup_args;
u32 addrcells, intcells;
- int i, imaplen;
+ int imaplen;
if (!IS_ENABLED(CONFIG_OF_IRQ))
return NULL;
@@ -1322,33 +1322,23 @@ static struct device_node *parse_interrupt_map(struct device_node *np,
addrcells = of_bus_n_addr_cells(np);
imap = of_get_property(np, "interrupt-map", &imaplen);
- if (!imap || imaplen <= (addrcells + intcells))
+ imaplen /= sizeof(*imap);
+ if (!imap)
return NULL;
- imap_end = imap + imaplen;
- while (imap < imap_end) {
- addr = imap;
- imap += addrcells;
+ imap_end = imap + imaplen;
- sup_args.np = np;
- sup_args.args_count = intcells;
- for (i = 0; i < intcells; i++)
- sup_args.args[i] = be32_to_cpu(imap[i]);
- imap += intcells;
+ for (int i = 0; imap + addrcells + intcells + 1 < imap_end; i++) {
+ imap += addrcells + intcells;
- /*
- * Upon success, the function of_irq_parse_raw() returns
- * interrupt controller DT node pointer in sup_args.np.
- */
- if (of_irq_parse_raw(addr, &sup_args))
+ imap = of_irq_parse_imap_parent(imap, imap_end - imap, &sup_args);
+ if (!imap)
return NULL;
- if (!index)
+ if (i == index)
return sup_args.np;
of_node_put(sup_args.np);
- imap += sup_args.args_count + 1;
- index--;
}
return NULL;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 5e6cd43a6dbd..9709537370ee 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -852,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap)
/* i2c adapter classes (bitmask) */
#define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */
-#define I2C_CLASS_SPD (1<<7) /* Memory modules */
/* Warn users that the adapter doesn't support classes anymore */
#define I2C_CLASS_DEPRECATED (1<<8)
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 6584443144de..eaf091a3d331 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -3,6 +3,7 @@
#include <linux/platform_device.h>
#include <linux/mod_devicetable.h>
+#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/sizes.h>
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index 5af9ba8a4645..c1ce39874e2b 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
#
-CFLAGS += $(shell pkg-config --cflags alsa)
+CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES)
LDLIBS += $(shell pkg-config --libs alsa)
ifeq ($(LDLIBS),)
LDLIBS += -lasound
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index b171fd53b004..632ab44737ec 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <stdio.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 759f86e7d263..2862aae58b79 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <inttypes.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index e59d985eeff0..048a312abf40 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -1,16 +1,28 @@
-CONFIG_KPROBES=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_EPROBE_EVENTS=y
+CONFIG_FPROBE=y
+CONFIG_FPROBE_EVENTS=y
CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_PROFILER=y
-CONFIG_TRACER_SNAPSHOT=y
-CONFIG_STACK_TRACER=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_SCHED_TRACER=y
-CONFIG_PREEMPT_TRACER=y
CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_PREEMPT_TRACER=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_TRACE_PRINTK=m
-CONFIG_KALLSYMS_ALL=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
index d3a79da215c8..5f72abe6fa79 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - check if duplicate events are caught
-# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat
echo 0 > events/enable
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 3f74c09c56b6..118247b8dd84 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -10,7 +10,6 @@ fail() { #msg
}
sample_events() {
- echo > trace
echo 1 > events/kmem/kmem_cache_free/enable
echo 1 > tracing_on
ls > /dev/null
@@ -22,6 +21,7 @@ echo 0 > tracing_on
echo 0 > events/enable
echo "Get the most frequently calling function"
+echo > trace
sample_events
target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
@@ -32,7 +32,16 @@ echo > trace
echo "Test event filter function name"
echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
@@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
echo "Test event filter function address"
echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+echo > trace
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 1f6981ef7afa..ba19b81cef39 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -30,7 +30,8 @@ find_dot_func() {
fi
grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do
- if grep -s $f available_filter_functions; then
+ cnt=`grep -s $f available_filter_functions | wc -l`;
+ if [ $cnt -eq 1 ]; then
echo $f
break
fi
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 11e157d7533b..78ab2cd111f6 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,8 +3,6 @@ SUBDIRS := functional
TEST_PROGS := run.sh
-.PHONY: all clean
-
include ../lib.mk
all:
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index a392d0917b4e..994fa3468f17 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE= -pthread $(INCLUDES) $(KHDR_INCLUDES)
LDLIBS := -lpthread -lrt
LOCAL_HDRS := \
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 7f3ca5c78df1..215c6cb539b4 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -360,7 +360,7 @@ out:
int main(int argc, char *argv[])
{
- const char *test_name;
+ char *test_name;
int c, ret;
while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 9024754530b2..5790ab446527 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -5,6 +5,7 @@
*/
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <fcntl.h>
#include <sched.h>
#include <sys/stat.h>