diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/fault.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 173 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/ffa.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 23 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/gfp.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/ffa.c | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-init.S | 7 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 19 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mm.c | 87 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/page_alloc.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 10 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 29 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 77 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 161 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/switch.c | 78 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 11 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/tlb.c | 59 |
21 files changed, 533 insertions, 245 deletions
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 9ddcfe2c3e57..9e13c1bc2ad5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) */ if (!(esr & ESR_ELx_S1PTW) && (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) { + esr_fsc_is_permission_fault(esr))) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 34f222af6165..a038320cdb08 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -30,6 +30,7 @@ #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> +#include <asm/traps.h> struct kvm_exception_table_entry { int insn, fixup; @@ -70,20 +71,73 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } -static inline bool __hfgxtr_traps_required(void) -{ - if (cpus_have_final_cap(ARM64_SME)) - return true; +#define compute_clr_set(vcpu, reg, clr, set) \ + do { \ + u64 hfg; \ + hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \ + set |= hfg & __ ## reg ## _MASK; \ + clr |= ~hfg & __ ## reg ## _nMASK; \ + } while(0) - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - return true; +#define update_fgt_traps_cs(vcpu, reg, clr, set) \ + do { \ + struct kvm_cpu_context *hctxt = \ + &this_cpu_ptr(&kvm_host_data)->host_ctxt; \ + u64 c = 0, s = 0; \ + \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + compute_clr_set(vcpu, reg, c, s); \ + s |= set; \ + c |= clr; \ + if (c || s) { \ + u64 val = __ ## reg ## _nMASK; \ + val |= s; \ + val &= ~c; \ + write_sysreg_s(val, SYS_ ## reg); \ + } \ + } while(0) + +#define update_fgt_traps(vcpu, reg) \ + update_fgt_traps_cs(vcpu, reg, 0, 0) - return false; +/* + * Validate the fine grain trap masks. + * Check that the masks do not overlap and that all bits are accounted for. + */ +#define CHECK_FGT_MASKS(reg) \ + do { \ + BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \ + BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \ + (__ ## reg ## _nMASK))); \ + } while(0) + +static inline bool cpu_has_amu(void) +{ + u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + + return cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_EL1_AMU_SHIFT); } -static inline void __activate_traps_hfgxtr(void) +static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; + u64 r_val, w_val; + + CHECK_FGT_MASKS(HFGRTR_EL2); + CHECK_FGT_MASKS(HFGWTR_EL2); + CHECK_FGT_MASKS(HFGITR_EL2); + CHECK_FGT_MASKS(HDFGRTR_EL2); + CHECK_FGT_MASKS(HDFGWTR_EL2); + CHECK_FGT_MASKS(HAFGRTR_EL2); + CHECK_FGT_MASKS(HCRX_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2); + ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2); if (cpus_have_final_cap(ARM64_SME)) { tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; @@ -98,26 +152,56 @@ static inline void __activate_traps_hfgxtr(void) if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) w_set |= HFGxTR_EL2_TCR_EL1_MASK; - sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set); - sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set); + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set); + compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set); + } + + /* The default to trap everything not handled or supported in KVM. */ + tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 | + HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1; + + r_val = __HFGRTR_EL2_nMASK & ~tmp; + r_val |= r_set; + r_val &= ~r_clr; + + w_val = __HFGWTR_EL2_nMASK & ~tmp; + w_val |= w_set; + w_val &= ~w_clr; + + write_sysreg_s(r_val, SYS_HFGRTR_EL2); + write_sysreg_s(w_val, SYS_HFGWTR_EL2); + + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + return; + + update_fgt_traps(vcpu, HFGITR_EL2); + update_fgt_traps(vcpu, HDFGRTR_EL2); + update_fgt_traps(vcpu, HDFGWTR_EL2); + + if (cpu_has_amu()) + update_fgt_traps(vcpu, HAFGRTR_EL2); } -static inline void __deactivate_traps_hfgxtr(void) +static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; + struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - if (cpus_have_final_cap(ARM64_SME)) { - tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; - r_set |= tmp; - w_set |= tmp; - } + write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2); - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - w_clr |= HFGxTR_EL2_TCR_EL1_MASK; + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + return; - sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set); - sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set); + write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2); + + if (cpu_has_amu()) + write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2); } static inline void __activate_traps_common(struct kvm_vcpu *vcpu) @@ -145,8 +229,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - if (__hfgxtr_traps_required()) - __activate_traps_hfgxtr(); + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + u64 hcrx = HCRX_GUEST_FLAGS; + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 clr = 0, set = 0; + + compute_clr_set(vcpu, HCRX_EL2, clr, set); + + hcrx |= set; + hcrx &= ~clr; + } + + write_sysreg_s(hcrx, SYS_HCRX_EL2); + } + + __activate_traps_hfgxtr(vcpu); } static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) @@ -162,8 +259,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } - if (__hfgxtr_traps_required()) - __deactivate_traps_hfgxtr(); + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + + __deactivate_traps_hfgxtr(vcpu); } static inline void ___activate_traps(struct kvm_vcpu *vcpu) @@ -177,9 +276,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) @@ -194,9 +290,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_VSE; vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; } - - if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); } static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) @@ -204,6 +297,22 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } +static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + + /* + * Finish potential single step before executing the prologue + * instruction. + */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); + + return true; +} + static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); @@ -513,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; - valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT && + valid = kvm_vcpu_trap_is_translation_fault(vcpu) && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h index 1becb10ecd80..d9fd5e6c7d3c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -12,6 +12,6 @@ #define FFA_MAX_FUNC_NUM 0x7F int hyp_ffa_init(void *pages); -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #endif /* __KVM_HYP_FFA_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 37440e1dda93..51f043649146 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -69,6 +69,8 @@ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ ) +#define PVM_ID_AA64PFR2_ALLOW 0ULL + /* * Allow for protected VMs: * - Mixed-endian @@ -101,6 +103,7 @@ * - Privileged Access Never * - SError interrupt exceptions from speculative reads * - Enhanced Translation Synchronization + * - Control for cache maintenance permission */ #define PVM_ID_AA64MMFR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ @@ -108,7 +111,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \ ) /* @@ -133,6 +137,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ ) +#define PVM_ID_AA64MMFR3_ALLOW (0ULL) + /* * No support for Scalable Vectors for protected VMs: * Requires additional support from KVM, e.g., context-switching and @@ -178,10 +184,18 @@ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ ) +/* Restrict pointer authentication to the basic version. */ +#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \ + ) + +#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \ + ) + #define PVM_ID_AA64ISAR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ @@ -196,8 +210,9 @@ ) #define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \ ) u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fe5472a184a3..97c527ef53c2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -16,7 +16,7 @@ struct hyp_pool { * API at EL2. */ hyp_spinlock_t lock; - struct list_head free_area[MAX_ORDER + 1]; + struct list_head free_area[NR_PAGE_ORDERS]; phys_addr_t range_start; phys_addr_t range_end; unsigned short max_order; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index d5ec972b5c1e..230e4f2527de 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot, unsigned long *haddr); +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr); int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index ab4f5d160c58..320f2eaa14a9 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -423,6 +423,7 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, DECLARE_REG(u32, fraglen, ctxt, 2); DECLARE_REG(u64, addr_mbz, ctxt, 3); DECLARE_REG(u32, npages_mbz, ctxt, 4); + struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; u32 offset, nr_ranges; @@ -452,7 +453,9 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, buf = hyp_buffers.tx; memcpy(buf, host_buffers.tx, fraglen); - offset = buf->ep_mem_access[0].composite_off; + ep_mem_access = (void *)buf + + ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + offset = ep_mem_access->composite_off; if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; @@ -504,6 +507,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, DECLARE_REG(u32, handle_lo, ctxt, 1); DECLARE_REG(u32, handle_hi, ctxt, 2); DECLARE_REG(u32, flags, ctxt, 3); + struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; u32 offset, len, fraglen, fragoff; struct ffa_mem_region *buf; @@ -528,7 +532,9 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, len = res->a1; fraglen = res->a2; - offset = buf->ep_mem_access[0].composite_off; + ep_mem_access = (void *)buf + + ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + offset = ep_mem_access->composite_off; /* * We can trust the SPMD to get this right, but let's at least * check that we end up with something that doesn't look _completely_ @@ -634,9 +640,8 @@ out_handled: return true; } -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); struct arm_smccc_res res; /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 90fade1b032e..2994878d68ea 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,6 +57,7 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc + bic x0, x0, #ARM_SMCCC_CALL_HINTS mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) cmp x0, x3 b.eq 1f @@ -121,11 +122,7 @@ alternative_if ARM64_HAS_CNP alternative_else_nop_endif msr ttbr0_el2, x2 - /* - * Set the PS bits in TCR_EL2. - */ ldr x0, [x0, #NVHE_INIT_TCR_EL2] - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 msr tcr_el2, x0 isb @@ -291,6 +288,8 @@ alternative_else_nop_endif mov sp, x0 /* And turn the MMU back on! */ + dsb nsh + isb set_sctlr_el2 x2 ret x1 SYM_FUNC_END(__pkvm_init_switch_pgd) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a169c619db60..2385fd03ed87 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level); } +static void +handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, start, host_ctxt, 2); + DECLARE_REG(unsigned long, pages, host_ctxt, 3); + + __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages); +} + static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); @@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh), HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_read_vmcr), @@ -357,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) @@ -381,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { + DECLARE_REG(u64, func_id, host_ctxt, 0); bool handled; - handled = kvm_host_psci_handler(host_ctxt); + func_id &= ~ARM_SMCCC_CALL_HINTS; + + handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) - handled = kvm_host_ffa_handler(host_ctxt); + handled = kvm_host_ffa_handler(host_ctxt, func_id); if (!handled) default_host_smc_handler(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 9d703441278b..861c76021a25 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr) hyp_put_page(&host_s2_pool, addr); } -static void host_s2_free_unlinked_table(void *addr, u32 level) +static void host_s2_free_unlinked_table(void *addr, s8 level) { kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); } @@ -129,8 +129,8 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, - id_aa64mmfr1_el1_sys_val, phys_shift); + host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + id_aa64mmfr1_el1_sys_val, phys_shift); } static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); @@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) unsigned long nr_pages; int ret; - nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); if (ret) return ret; @@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_mmu.arch.vtcr; + params->vtcr = mmu->vtcr; params->hcr_el2 |= HCR_VM; /* @@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; - u32 level; + s8 level; int ret; hyp_assert_lock_held(&host_mmu.lock); @@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; level++; - } while ((level < KVM_PGTABLE_MAX_LEVELS) && + } while ((level <= KVM_PGTABLE_LAST_LEVEL) && !(kvm_level_supports_block_mapping(level) && range_included(&cur, range))); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 318298eb3d6b..b01a3d1078a8 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } +static int __pkvm_alloc_private_va_range(unsigned long start, size_t size) +{ + unsigned long cur; + + hyp_assert_lock_held(&pkvm_pgd_lock); + + if (!start || start < __io_map_base) + return -EINVAL; + + /* The allocated size is always a multiple of PAGE_SIZE */ + cur = start + PAGE_ALIGN(size); + + /* Are we overflowing on the vmemmap ? */ + if (cur > __hyp_vmemmap) + return -ENOMEM; + + __io_map_base = cur; + + return 0; +} + /** * pkvm_alloc_private_va_range - Allocates a private VA range. * @size: The size of the VA range to reserve. @@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, */ int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) { - unsigned long base, addr; - int ret = 0; + unsigned long addr; + int ret; hyp_spin_lock(&pkvm_pgd_lock); - - /* Align the allocation based on the order of its size */ - addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); - - /* The allocated size is always a multiple of PAGE_SIZE */ - base = addr + PAGE_ALIGN(size); - - /* Are we overflowing on the vmemmap ? */ - if (!addr || base > __hyp_vmemmap) - ret = -ENOMEM; - else { - __io_map_base = base; - *haddr = addr; - } - + addr = __io_map_base; + ret = __pkvm_alloc_private_va_range(addr, size); hyp_spin_unlock(&pkvm_pgd_lock); + *haddr = addr; + return ret; } @@ -250,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 */ dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL); dsb(ish); isb(); } @@ -265,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, { struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); - if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1) + if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL) return -EINVAL; slot->addr = ctx->addr; @@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } +int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr) +{ + unsigned long addr, prev_base; + size_t size; + int ret; + + hyp_spin_lock(&pkvm_pgd_lock); + + prev_base = __io_map_base; + /* + * Efficient stack verification using the PAGE_SHIFT bit implies + * an alignment of our allocation on the order of the size. + */ + size = PAGE_SIZE * 2; + addr = ALIGN(__io_map_base, size); + + ret = __pkvm_alloc_private_va_range(addr, size); + if (!ret) { + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, + PAGE_SIZE, phys, PAGE_HYP); + if (ret) + __io_map_base = prev_base; + } + hyp_spin_unlock(&pkvm_pgd_lock); + + *haddr = addr + size; + + return ret; +} + static void *admit_host_page(void *arg) { struct kvm_hyp_memcache *host_mc = arg; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index b1e392186a0f..e691290d3765 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -228,7 +228,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, int i; hyp_spin_lock_init(&pool->lock); - pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + pool->max_order = min(MAX_PAGE_ORDER, + get_order(nr_pages << PAGE_SHIFT)); for (i = 0; i <= pool->max_order; i++) INIT_LIST_HEAD(&pool->free_area[i]); pool->range_start = phys; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8033ef353a5d..26dd9a20ad6e 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -12,7 +12,7 @@ #include <nvhe/pkvm.h> #include <nvhe/trap_handler.h> -/* Used by icache_is_vpipt(). */ +/* Used by icache_is_aliasing(). */ unsigned long __icache_flags; /* Used by kvm_get_vttbr(). */ @@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) cptr_set |= CPTR_EL2_TTA; } + /* Trap External Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) + mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; vcpu->arch.cptr_el2 |= cptr_set; @@ -303,7 +307,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, { hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; + hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; } static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, @@ -483,7 +487,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, } vm_size = pkvm_get_hyp_vm_size(nr_vcpus); - pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); ret = -ENOMEM; diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 24543d2a3490..d57bcb6ab94d 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ } } -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; switch (kvm_host_psci_config.version) { diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bb98630dfeaf..bc58d1b515af 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, for (i = 0; i < hyp_nr_cpus; i++) { struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i); - unsigned long hyp_addr; start = (void *)kern_hyp_va(per_cpu_base[i]); end = start + PAGE_ALIGN(hyp_percpu_size); @@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - /* - * Allocate a contiguous HYP private VA range for the stack - * and guard page. The allocation is also aligned based on - * the order of its size. - */ - ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va); if (ret) return ret; - - /* - * Since the stack grows downwards, map the stack to the page - * at the higher address and leave the lower guard page - * unbacked. - * - * Any valid stack address now has the PAGE_SHIFT bit as 1 - * and addresses corresponding to the guard page have the - * PAGE_SHIFT bit as 0 - this is used for overflow detection. - */ - hyp_spin_lock(&pkvm_pgd_lock); - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, - PAGE_SIZE, params->stack_pa, PAGE_HYP); - hyp_spin_unlock(&pkvm_pgd_lock); - if (ret) - return ret; - - /* Update stack_hyp_va to end of the stack's private VA range */ - params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } /* @@ -206,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!kvm_pte_valid(ctx->old)) return 0; - if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != KVM_PGTABLE_LAST_LEVEL) return -EINVAL; phys = kvm_pte_to_phys(ctx->old); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e89a23153e85..c50f8459e4fc 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -192,6 +192,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, + [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn pvm_exit_handlers[] = { @@ -203,6 +204,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, + [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) @@ -236,7 +238,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) * KVM_ARM_VCPU_INIT, however, this is likely not possible for * protected VMs. */ - vcpu->arch.target = -1; + vcpu_clear_flag(vcpu, VCPU_INITIALIZED); *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index b9991bbd8e3f..a60fb13e2192 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -105,28 +105,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (icache_is_vpipt()) - icache_inval_all_pou(); - __tlb_switch_to_host(&cxt); } @@ -157,27 +135,31 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE */ - if (icache_is_vpipt()) - icache_inval_all_pou(); + stride = PAGE_SIZE; + start = round_down(start, stride); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt, false); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); __tlb_switch_to_host(&cxt); } @@ -216,18 +198,5 @@ void __kvm_flush_vm_context(void) /* Same remark as in __tlb_switch_to_guest() */ dsb(ish); __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - dsb(ish); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f7a93ef29250..c651df904fe3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx) static bool kvm_phys_is_valid(u64 phys) { - return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); + u64 parange_max = kvm_get_parange_max(); + u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max); + + return phys < BIT(shift); } static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) @@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, return IS_ALIGNED(ctx->addr, granule); } -static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) +static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level) { u64 shift = kvm_granule_shift(level); u64 mask = BIT(PAGE_SHIFT - 3) - 1; @@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) return (addr & mask) >> shift; } -static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level) { struct kvm_pgtable pgt = { .ia_bits = ia_bits, @@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) return kvm_pgd_page_idx(&pgt, -1ULL) + 1; } -static bool kvm_pte_table(kvm_pte_t pte, u32 level) +static bool kvm_pte_table(kvm_pte_t pte, s8 level) { - if (level == KVM_PGTABLE_MAX_LEVELS - 1) + if (level == KVM_PGTABLE_LAST_LEVEL) return false; if (!kvm_pte_valid(pte)) @@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops return pte; } -static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) +static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level) { kvm_pte_t pte = kvm_phys_to_pte(pa); - u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : - KVM_PTE_TYPE_BLOCK; + u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE : + KVM_PTE_TYPE_BLOCK; pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); pte |= FIELD_PREP(KVM_PTE_TYPE, type); @@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, - kvm_pteref_t pteref, u32 level) + kvm_pteref_t pteref, s8 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); @@ -272,12 +275,13 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level) { u32 idx; int ret = 0; - if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) + if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL || + level > KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { @@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct leaf_walk_data { kvm_pte_t pte; - u32 level; + s8 level; }; static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, } int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level) + kvm_pte_t *ptep, s8 *level) { struct leaf_walk_data data; struct kvm_pgtable_walker walker = { @@ -401,14 +405,15 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (device) return -EINVAL; - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) + if (system_supports_bti_kernel()) attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP; } else { attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; } attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); + if (!kvm_lpa2_is_enabled()) + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; @@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, if (hyp_map_walker_try_leaf(ctx, data)) return 0; - if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); @@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) { - u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); + s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 - + ARM64_HW_PGTABLE_LEVELS(va_bits); + + if (start_level < KVM_PGTABLE_FIRST_LEVEL || + start_level > KVM_PGTABLE_LAST_LEVEL) + return -EINVAL; pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; pgt->ia_bits = va_bits; - pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; + pgt->start_level = start_level; pgt->mm_ops = mm_ops; pgt->mmu = NULL; pgt->force_pte_cb = NULL; @@ -624,7 +634,7 @@ struct stage2_map_data { u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) { u64 vtcr = VTCR_EL2_FLAGS; - u8 lvls; + s8 lvls; vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); @@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) lvls = stage2_pgtable_levels(phys_shift); if (lvls < 2) lvls = 2; + + /* + * When LPA2 is enabled, the HW supports an extra level of translation + * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2 + * to as an addition to SL0 to enable encoding this extra start level. + * However, since we always use concatenated pages for the first level + * lookup, we will never need this extra level and therefore do not need + * to touch SL2. + */ vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); #ifdef CONFIG_ARM64_HW_AFDBM @@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */ + if (kvm_lpa2_is_enabled()) + vtcr |= VTCR_EL2_DS; + /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT : @@ -664,12 +686,32 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) static bool stage2_has_fwb(struct kvm_pgtable *pgt) { - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) return false; return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); } +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size) +{ + unsigned long pages, inval_pages; + + if (!system_supports_tlb_range()) { + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + return; + } + + pages = size >> PAGE_SHIFT; + while (pages > 0) { + inval_pages = min(pages, MAX_TLBI_RANGE_PAGES); + kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages); + + addr += inval_pages << PAGE_SHIFT; + pages -= inval_pages; + } +} + #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, @@ -691,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p if (prot & KVM_PGTABLE_PROT_W) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); + if (!kvm_lpa2_is_enabled()) + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); + attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; @@ -786,7 +830,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * evicted pte value (if any). */ if (kvm_pte_table(ctx->old, ctx->level)) - kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + kvm_tlb_flush_vmid_range(mmu, ctx->addr, + kvm_granule_size(ctx->level)); else if (kvm_pte_valid(ctx->old)) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); @@ -810,16 +855,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n smp_store_release(ctx->ptep, new); } -static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, - struct kvm_pgtable_mm_ops *mm_ops) +static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) +{ + /* + * If FEAT_TLBIRANGE is implemented, defer the individual + * TLB invalidations until the entire walk is finished, and + * then use the range-based TLBI instructions to do the + * invalidations. Condition deferred TLB invalidation on the + * system supporting FWB as the optimization is entirely + * pointless when the unmap walker needs to perform CMOs. + */ + return system_supports_tlb_range() && stage2_has_fwb(pgt); +} + +static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { + struct kvm_pgtable *pgt = ctx->arg; + /* - * Clear the existing PTE, and perform break-before-make with - * TLB maintenance if it was valid. + * Clear the existing PTE, and perform break-before-make if it was + * valid. Depending on the system support, defer the TLB maintenance + * for the same until the entire unmap walk is completed. */ if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (!stage2_unmap_defer_tlb_flush(pgt)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, + ctx->addr, ctx->level); } mm_ops->put_page(ctx->ptep); @@ -861,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, { u64 phys = stage2_map_walker_phys_addr(ctx, data); - if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) + if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL) return false; return kvm_block_mapping_supported(ctx, phys); @@ -940,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (ret != -E2BIG) return ret; - if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; if (!data->memcache) @@ -1077,7 +1142,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ctx, mmu, mm_ops); + stage2_unmap_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), @@ -1091,20 +1156,26 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { + int ret; struct kvm_pgtable_walker walker = { .cb = stage2_unmap_walker, .arg = pgt, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - return kvm_pgtable_walk(pgt, addr, size, &walker); + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + if (stage2_unmap_defer_tlb_flush(pgt)) + /* Perform the deferred TLB invalidations */ + kvm_tlb_flush_vmid_range(pgt->mmu, addr, size); + + return ret; } struct stage2_attr_data { kvm_pte_t attr_set; kvm_pte_t attr_clr; kvm_pte_t pte; - u32 level; + s8 level; }; static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -1147,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level, enum kvm_pgtable_walk_flags flags) + s8 *level, enum kvm_pgtable_walk_flags flags) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; @@ -1249,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot) { int ret; - u32 level; + s8 level; kvm_pte_t set = 0, clr = 0; if (prot & KVM_PTE_LEAF_ATTR_HI_SW) @@ -1267,7 +1338,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); - if (!ret) + if (!ret || ret == -EAGAIN) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); return ret; } @@ -1302,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) } kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, - u64 phys, u32 level, + u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte) { @@ -1360,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, * fully populated tree up to the PTE entries. Note that @level is * interpreted as in "level @level entry". */ -static int stage2_block_get_nr_page_tables(u32 level) +static int stage2_block_get_nr_page_tables(s8 level) { switch (level) { case 1: @@ -1371,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level) return 0; default: WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL || - level >= KVM_PGTABLE_MAX_LEVELS); + level > KVM_PGTABLE_LAST_LEVEL); return -EINVAL; }; } @@ -1384,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu; kvm_pte_t pte = ctx->old, new, *childp; enum kvm_pgtable_prot prot; - u32 level = ctx->level; + s8 level = ctx->level; bool force_pte; int nr_pages; u64 phys; /* No huge-pages exist at the last level */ - if (level == KVM_PGTABLE_MAX_LEVELS - 1) + if (level == KVM_PGTABLE_LAST_LEVEL) return 0; /* We only split valid block mappings */ @@ -1464,10 +1535,10 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; - u64 vtcr = mmu->arch->vtcr; + u64 vtcr = mmu->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); - u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); @@ -1490,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) { u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); - u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; } @@ -1526,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->pgd = NULL; } -void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; struct kvm_pgtable_walker walker = { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8c..1581df6aec87 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); + if (has_cntpoff()) { + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * We're entrering the guest. Reload the correct + * values from memory now that TGE is clear. + */ + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); + + if (map.direct_ptimer) { + write_sysreg_el0(val, SYS_CNTP_CVAL); + isb(); + } + } + val = read_sysreg(cpacr_el1); val |= CPACR_ELx_TTA; val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | @@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (has_cntpoff()) { + struct timer_map map; + u64 val, offset; + + get_timer_map(vcpu, &map); + + /* + * We're exiting the guest. Save the latest CVAL value + * to memory and apply the offset now that TGE is set. + */ + val = read_sysreg_el0(SYS_CNTP_CVAL); + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + + offset = read_sysreg_s(SYS_CNTPOFF_EL2); + + if (map.direct_ptimer && offset) { + write_sysreg_el0(val + offset, SYS_CNTP_CVAL); + isb(); + } + } + /* * ARM errata 1165522 and 1530923 require the actual execution of the * above before we can switch to the EL2/EL0 translation regime used by @@ -93,12 +137,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) NOKPROBE_SYMBOL(__deactivate_traps); /* - * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to + * Disable IRQs in __vcpu_{load,put}_{activate,deactivate}_traps() to * prevent a race condition between context switching of PMUSERENR_EL0 * in __{activate,deactivate}_traps_common() and IPIs that attempts to * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). */ -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +static void __vcpu_load_activate_traps(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -107,7 +151,7 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) local_irq_restore(flags); } -void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) +static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -116,6 +160,19 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) local_irq_restore(flags); } +void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu) +{ + __vcpu_load_switch_sysregs(vcpu); + __vcpu_load_activate_traps(vcpu); + __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); +} + +void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu) +{ + __vcpu_put_deactivate_traps(vcpu); + __vcpu_put_switch_sysregs(vcpu); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -126,6 +183,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, + [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) @@ -170,17 +228,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs_vhe above. We must now call - * __load_stage2 before __activate_traps, because - * __load_stage2 configures stage 2 translation, and - * __activate_traps clear HCR_EL2.TGE (among other things). + * Note that ARM erratum 1165522 requires us to configure both stage 1 + * and stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. The stage 1 and stage 2 guest context has already been + * loaded on the CPU in kvm_vcpu_load_vhe(). */ - __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index b35a178e7e0d..8e1e0d5033b6 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -52,7 +52,7 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); /** - * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU * * @vcpu: The VCPU pointer * @@ -62,7 +62,7 @@ NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); * and loading system register state early avoids having to load them on * every entry to the VM. */ -void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; @@ -92,12 +92,10 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_restore_el1_state(guest_ctxt); vcpu_set_flag(vcpu, SYSREGS_ON_CPU); - - activate_traps_vhe_load(vcpu); } /** - * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU * * @vcpu: The VCPU pointer * @@ -107,13 +105,12 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) * and deferring saving system register state until we're no longer running the * VCPU avoids having to save them on every exit from the VM. */ -void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(vcpu); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index e69da550cdc5..b32e2940df7d 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -11,18 +11,25 @@ #include <asm/tlbflush.h> struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; + struct kvm_s2_mmu *mmu; + unsigned long flags; + u64 tcr; + u64 sctlr; }; static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, struct tlb_inv_context *cxt) { + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); u64 val; local_irq_save(cxt->flags); + if (vcpu && mmu != vcpu->arch.hw_mmu) + cxt->mmu = vcpu->arch.hw_mmu; + else + cxt->mmu = NULL; + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* * For CPUs that are affected by ARM errata 1165522 or 1530923, @@ -66,10 +73,13 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt) * We're done with the TLB operation, let's restore the host's * view of HCR_EL2. */ - write_sysreg(0, vttbr_el2); write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); + /* ... and the stage-2 MMU context that we switched away from */ + if (cxt->mmu) + __load_stage2(cxt->mmu, cxt->mmu->arch); + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); @@ -143,6 +153,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + + /* + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE + */ + stride = PAGE_SIZE; + start = round_down(start, stride); + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -178,18 +216,5 @@ void __kvm_flush_vm_context(void) { dsb(ishst); __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - dsb(ish); } |