aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h173
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/ffa.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h23
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S7
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c19
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c87
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c10
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c29
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c77
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c161
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c78
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c11
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c59
21 files changed, 533 insertions, 245 deletions
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9ddcfe2c3e57..9e13c1bc2ad5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
+ esr_fsc_is_permission_fault(esr))) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 34f222af6165..a038320cdb08 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -30,6 +30,7 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
+#include <asm/traps.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -70,20 +71,73 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
}
}
-static inline bool __hfgxtr_traps_required(void)
-{
- if (cpus_have_final_cap(ARM64_SME))
- return true;
+#define compute_clr_set(vcpu, reg, clr, set) \
+ do { \
+ u64 hfg; \
+ hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \
+ set |= hfg & __ ## reg ## _MASK; \
+ clr |= ~hfg & __ ## reg ## _nMASK; \
+ } while(0)
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- return true;
+#define update_fgt_traps_cs(vcpu, reg, clr, set) \
+ do { \
+ struct kvm_cpu_context *hctxt = \
+ &this_cpu_ptr(&kvm_host_data)->host_ctxt; \
+ u64 c = 0, s = 0; \
+ \
+ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
+ compute_clr_set(vcpu, reg, c, s); \
+ s |= set; \
+ c |= clr; \
+ if (c || s) { \
+ u64 val = __ ## reg ## _nMASK; \
+ val |= s; \
+ val &= ~c; \
+ write_sysreg_s(val, SYS_ ## reg); \
+ } \
+ } while(0)
+
+#define update_fgt_traps(vcpu, reg) \
+ update_fgt_traps_cs(vcpu, reg, 0, 0)
- return false;
+/*
+ * Validate the fine grain trap masks.
+ * Check that the masks do not overlap and that all bits are accounted for.
+ */
+#define CHECK_FGT_MASKS(reg) \
+ do { \
+ BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \
+ BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \
+ (__ ## reg ## _nMASK))); \
+ } while(0)
+
+static inline bool cpu_has_amu(void)
+{
+ u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+
+ return cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_EL1_AMU_SHIFT);
}
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+ u64 r_val, w_val;
+
+ CHECK_FGT_MASKS(HFGRTR_EL2);
+ CHECK_FGT_MASKS(HFGWTR_EL2);
+ CHECK_FGT_MASKS(HFGITR_EL2);
+ CHECK_FGT_MASKS(HDFGRTR_EL2);
+ CHECK_FGT_MASKS(HDFGWTR_EL2);
+ CHECK_FGT_MASKS(HAFGRTR_EL2);
+ CHECK_FGT_MASKS(HCRX_EL2);
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ return;
+
+ ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+ ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
if (cpus_have_final_cap(ARM64_SME)) {
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@@ -98,26 +152,56 @@ static inline void __activate_traps_hfgxtr(void)
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
w_set |= HFGxTR_EL2_TCR_EL1_MASK;
- sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
- sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+ compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+ }
+
+ /* The default to trap everything not handled or supported in KVM. */
+ tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
+ HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
+
+ r_val = __HFGRTR_EL2_nMASK & ~tmp;
+ r_val |= r_set;
+ r_val &= ~r_clr;
+
+ w_val = __HFGWTR_EL2_nMASK & ~tmp;
+ w_val |= w_set;
+ w_val &= ~w_clr;
+
+ write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+ write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return;
+
+ update_fgt_traps(vcpu, HFGITR_EL2);
+ update_fgt_traps(vcpu, HDFGRTR_EL2);
+ update_fgt_traps(vcpu, HDFGWTR_EL2);
+
+ if (cpu_has_amu())
+ update_fgt_traps(vcpu, HAFGRTR_EL2);
}
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+ struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- if (cpus_have_final_cap(ARM64_SME)) {
- tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ return;
- r_set |= tmp;
- w_set |= tmp;
- }
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return;
- sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
- sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+
+ if (cpu_has_amu())
+ write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -145,8 +229,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- if (__hfgxtr_traps_required())
- __activate_traps_hfgxtr();
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ u64 hcrx = HCRX_GUEST_FLAGS;
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ u64 clr = 0, set = 0;
+
+ compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+ hcrx |= set;
+ hcrx &= ~clr;
+ }
+
+ write_sysreg_s(hcrx, SYS_HCRX_EL2);
+ }
+
+ __activate_traps_hfgxtr(vcpu);
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -162,8 +259,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
- if (__hfgxtr_traps_required())
- __deactivate_traps_hfgxtr();
+ if (cpus_have_final_cap(ARM64_HAS_HCX))
+ write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+ __deactivate_traps_hfgxtr(vcpu);
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -177,9 +276,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- if (cpus_have_final_cap(ARM64_HAS_HCX))
- write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -194,9 +290,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 &= ~HCR_VSE;
vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
}
-
- if (cpus_have_final_cap(ARM64_HAS_HCX))
- write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
}
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
@@ -204,6 +297,22 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
+static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+
+ /*
+ * Finish potential single step before executing the prologue
+ * instruction.
+ */
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ return true;
+}
+
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
@@ -513,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
- valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
+ valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
index 1becb10ecd80..d9fd5e6c7d3c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
@@ -12,6 +12,6 @@
#define FFA_MAX_FUNC_NUM 0x7F
int hyp_ffa_init(void *pages);
-bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt);
+bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
#endif /* __KVM_HYP_FFA_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index 37440e1dda93..51f043649146 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -69,6 +69,8 @@
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
+#define PVM_ID_AA64PFR2_ALLOW 0ULL
+
/*
* Allow for protected VMs:
* - Mixed-endian
@@ -101,6 +103,7 @@
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
+ * - Control for cache maintenance permission
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
@@ -108,7 +111,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
)
/*
@@ -133,6 +137,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
+#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
+
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
@@ -178,10 +184,18 @@
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
+/* Restrict pointer authentication to the basic version. */
+#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
+ )
+
+#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
+ )
+
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
@@ -196,8 +210,9 @@
)
#define PVM_ID_AA64ISAR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index fe5472a184a3..97c527ef53c2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -16,7 +16,7 @@ struct hyp_pool {
* API at EL2.
*/
hyp_spinlock_t lock;
- struct list_head free_area[MAX_ORDER + 1];
+ struct list_head free_area[NR_PAGE_ORDERS];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index d5ec972b5c1e..230e4f2527de 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index ab4f5d160c58..320f2eaa14a9 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -423,6 +423,7 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
DECLARE_REG(u32, fraglen, ctxt, 2);
DECLARE_REG(u64, addr_mbz, ctxt, 3);
DECLARE_REG(u32, npages_mbz, ctxt, 4);
+ struct ffa_mem_region_attributes *ep_mem_access;
struct ffa_composite_mem_region *reg;
struct ffa_mem_region *buf;
u32 offset, nr_ranges;
@@ -452,7 +453,9 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
- offset = buf->ep_mem_access[0].composite_off;
+ ep_mem_access = (void *)buf +
+ ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0);
+ offset = ep_mem_access->composite_off;
if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
@@ -504,6 +507,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
DECLARE_REG(u32, handle_lo, ctxt, 1);
DECLARE_REG(u32, handle_hi, ctxt, 2);
DECLARE_REG(u32, flags, ctxt, 3);
+ struct ffa_mem_region_attributes *ep_mem_access;
struct ffa_composite_mem_region *reg;
u32 offset, len, fraglen, fragoff;
struct ffa_mem_region *buf;
@@ -528,7 +532,9 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
len = res->a1;
fraglen = res->a2;
- offset = buf->ep_mem_access[0].composite_off;
+ ep_mem_access = (void *)buf +
+ ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0);
+ offset = ep_mem_access->composite_off;
/*
* We can trust the SPMD to get this right, but let's at least
* check that we end up with something that doesn't look _completely_
@@ -634,9 +640,8 @@ out_handled:
return true;
}
-bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
+bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
- DECLARE_REG(u64, func_id, host_ctxt, 0);
struct arm_smccc_res res;
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 90fade1b032e..2994878d68ea 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -57,6 +57,7 @@ __do_hyp_init:
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
+ bic x0, x0, #ARM_SMCCC_CALL_HINTS
mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init)
cmp x0, x3
b.eq 1f
@@ -121,11 +122,7 @@ alternative_if ARM64_HAS_CNP
alternative_else_nop_endif
msr ttbr0_el2, x2
- /*
- * Set the PS bits in TCR_EL2.
- */
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
- tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x0
isb
@@ -291,6 +288,8 @@ alternative_else_nop_endif
mov sp, x0
/* And turn the MMU back on! */
+ dsb nsh
+ isb
set_sctlr_el2 x2
ret x1
SYM_FUNC_END(__pkvm_init_switch_pgd)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a169c619db60..2385fd03ed87 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+ DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+ __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),
@@ -357,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
if (static_branch_unlikely(&kvm_protected_mode_initialized))
hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
+ id &= ~ARM_SMCCC_CALL_HINTS;
id -= KVM_HOST_SMCCC_ID(0);
if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
@@ -381,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
{
+ DECLARE_REG(u64, func_id, host_ctxt, 0);
bool handled;
- handled = kvm_host_psci_handler(host_ctxt);
+ func_id &= ~ARM_SMCCC_CALL_HINTS;
+
+ handled = kvm_host_psci_handler(host_ctxt, func_id);
if (!handled)
- handled = kvm_host_ffa_handler(host_ctxt);
+ handled = kvm_host_ffa_handler(host_ctxt, func_id);
if (!handled)
default_host_smc_handler(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 9d703441278b..861c76021a25 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr)
hyp_put_page(&host_s2_pool, addr);
}
-static void host_s2_free_unlinked_table(void *addr, u32 level)
+static void host_s2_free_unlinked_table(void *addr, s8 level)
{
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
}
@@ -129,8 +129,8 @@ static void prepare_host_vtcr(void)
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
- host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
- id_aa64mmfr1_el1_sys_val, phys_shift);
+ host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
+ id_aa64mmfr1_el1_sys_val, phys_shift);
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
@@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
unsigned long nr_pages;
int ret;
- nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+ nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
if (ret)
return ret;
@@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void)
return -EPERM;
params->vttbr = kvm_get_vttbr(mmu);
- params->vtcr = host_mmu.arch.vtcr;
+ params->vtcr = mmu->vtcr;
params->hcr_el2 |= HCR_VM;
/*
@@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
- u32 level;
+ s8 level;
int ret;
hyp_assert_lock_held(&host_mmu.lock);
@@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
level++;
- } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+ } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
!(kvm_level_supports_block_mapping(level) &&
range_included(&cur, range)));
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 318298eb3d6b..b01a3d1078a8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+ unsigned long cur;
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+
+ if (!start || start < __io_map_base)
+ return -EINVAL;
+
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ cur = start + PAGE_ALIGN(size);
+
+ /* Are we overflowing on the vmemmap ? */
+ if (cur > __hyp_vmemmap)
+ return -ENOMEM;
+
+ __io_map_base = cur;
+
+ return 0;
+}
+
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long base, addr;
- int ret = 0;
+ unsigned long addr;
+ int ret;
hyp_spin_lock(&pkvm_pgd_lock);
-
- /* Align the allocation based on the order of its size */
- addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
- /* The allocated size is always a multiple of PAGE_SIZE */
- base = addr + PAGE_ALIGN(size);
-
- /* Are we overflowing on the vmemmap ? */
- if (!addr || base > __hyp_vmemmap)
- ret = -ENOMEM;
- else {
- __io_map_base = base;
- *haddr = addr;
- }
-
+ addr = __io_map_base;
+ ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
+ *haddr = addr;
+
return ret;
}
@@ -250,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
- __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
dsb(ish);
isb();
}
@@ -265,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
{
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
- if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
+ if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
slot->addr = ctx->addr;
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+ unsigned long addr, prev_base;
+ size_t size;
+ int ret;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+
+ prev_base = __io_map_base;
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ addr = ALIGN(__io_map_base, size);
+
+ ret = __pkvm_alloc_private_va_range(addr, size);
+ if (!ret) {
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+ PAGE_SIZE, phys, PAGE_HYP);
+ if (ret)
+ __io_map_base = prev_base;
+ }
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ *haddr = addr + size;
+
+ return ret;
+}
+
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index b1e392186a0f..e691290d3765 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -228,7 +228,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ pool->max_order = min(MAX_PAGE_ORDER,
+ get_order(nr_pages << PAGE_SHIFT));
for (i = 0; i <= pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 8033ef353a5d..26dd9a20ad6e 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -12,7 +12,7 @@
#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
-/* Used by icache_is_vpipt(). */
+/* Used by icache_is_aliasing(). */
unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
@@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
cptr_set |= CPTR_EL2_TTA;
}
+ /* Trap External Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
+ mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+
vcpu->arch.mdcr_el2 |= mdcr_set;
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
vcpu->arch.cptr_el2 |= cptr_set;
@@ -303,7 +307,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
{
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
- hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
+ hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -483,7 +487,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
}
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
- pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+ pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
ret = -ENOMEM;
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 24543d2a3490..d57bcb6ab94d 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_
}
}
-bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
- DECLARE_REG(u64, func_id, host_ctxt, 0);
unsigned long ret;
switch (kvm_host_psci_config.version) {
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bb98630dfeaf..bc58d1b515af 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
- unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
if (ret)
return ret;
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- hyp_spin_lock(&pkvm_pgd_lock);
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
- PAGE_SIZE, params->stack_pa, PAGE_HYP);
- hyp_spin_unlock(&pkvm_pgd_lock);
- if (ret)
- return ret;
-
- /* Update stack_hyp_va to end of the stack's private VA range */
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
@@ -206,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (!kvm_pte_valid(ctx->old))
return 0;
- if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
+ if (ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
phys = kvm_pte_to_phys(ctx->old);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e89a23153e85..c50f8459e4fc 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn pvm_exit_handlers[] = {
@@ -203,6 +204,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
@@ -236,7 +238,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
* protected VMs.
*/
- vcpu->arch.target = -1;
+ vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b9991bbd8e3f..a60fb13e2192 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -105,28 +105,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -157,27 +135,31 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
/*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
*/
- if (icache_is_vpipt())
- icache_inval_all_pou();
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt, false);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
__tlb_switch_to_host(&cxt);
}
@@ -216,18 +198,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in __tlb_switch_to_guest() */
dsb(ish);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f7a93ef29250..c651df904fe3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
static bool kvm_phys_is_valid(u64 phys)
{
- return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
+ u64 parange_max = kvm_get_parange_max();
+ u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
+
+ return phys < BIT(shift);
}
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
@@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
return IS_ALIGNED(ctx->addr, granule);
}
-static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
+static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
{
u64 shift = kvm_granule_shift(level);
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
@@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
return (addr & mask) >> shift;
}
-static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
+static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
{
struct kvm_pgtable pgt = {
.ia_bits = ia_bits,
@@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
}
-static bool kvm_pte_table(kvm_pte_t pte, u32 level)
+static bool kvm_pte_table(kvm_pte_t pte, s8 level)
{
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return false;
if (!kvm_pte_valid(pte))
@@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops
return pte;
}
-static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
+static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
{
kvm_pte_t pte = kvm_phys_to_pte(pa);
- u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
- KVM_PTE_TYPE_BLOCK;
+ u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
+ KVM_PTE_TYPE_BLOCK;
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
@@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
struct kvm_pgtable_mm_ops *mm_ops,
- kvm_pteref_t pteref, u32 level)
+ kvm_pteref_t pteref, s8 level)
{
enum kvm_pgtable_walk_flags flags = data->walker->flags;
kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
@@ -272,12 +275,13 @@ out:
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
{
u32 idx;
int ret = 0;
- if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
+ if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
+ level > KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
@@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct leaf_walk_data {
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
}
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level)
+ kvm_pte_t *ptep, s8 *level)
{
struct leaf_walk_data data;
struct kvm_pgtable_walker walker = {
@@ -401,14 +405,15 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
if (device)
return -EINVAL;
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
+ if (system_supports_bti_kernel())
attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
} else {
attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
}
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (hyp_map_walker_try_leaf(ctx, data))
return 0;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
@@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
- u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
+ s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
+ ARM64_HW_PGTABLE_LEVELS(va_bits);
+
+ if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
+ start_level > KVM_PGTABLE_LAST_LEVEL)
+ return -EINVAL;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = va_bits;
- pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
+ pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
pgt->force_pte_cb = NULL;
@@ -624,7 +634,7 @@ struct stage2_map_data {
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
{
u64 vtcr = VTCR_EL2_FLAGS;
- u8 lvls;
+ s8 lvls;
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
vtcr |= VTCR_EL2_T0SZ(phys_shift);
@@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
lvls = stage2_pgtable_levels(phys_shift);
if (lvls < 2)
lvls = 2;
+
+ /*
+ * When LPA2 is enabled, the HW supports an extra level of translation
+ * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
+ * to as an addition to SL0 to enable encoding this extra start level.
+ * However, since we always use concatenated pages for the first level
+ * lookup, we will never need this extra level and therefore do not need
+ * to touch SL2.
+ */
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
#ifdef CONFIG_ARM64_HW_AFDBM
@@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
vtcr |= VTCR_EL2_HA;
#endif /* CONFIG_ARM64_HW_AFDBM */
+ if (kvm_lpa2_is_enabled())
+ vtcr |= VTCR_EL2_DS;
+
/* Set the vmid bits */
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
VTCR_EL2_VS_16BIT :
@@ -664,12 +686,32 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
static bool stage2_has_fwb(struct kvm_pgtable *pgt)
{
- if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
return false;
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
}
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size)
+{
+ unsigned long pages, inval_pages;
+
+ if (!system_supports_tlb_range()) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ return;
+ }
+
+ pages = size >> PAGE_SHIFT;
+ while (pages > 0) {
+ inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+ addr += inval_pages << PAGE_SHIFT;
+ pages -= inval_pages;
+ }
+}
+
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -691,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
if (prot & KVM_PGTABLE_PROT_W)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -786,7 +830,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* evicted pte value (if any).
*/
if (kvm_pte_table(ctx->old, ctx->level))
- kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+ kvm_granule_size(ctx->level));
else if (kvm_pte_valid(ctx->old))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
@@ -810,16 +855,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
smp_store_release(ctx->ptep, new);
}
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
- struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+ /*
+ * If FEAT_TLBIRANGE is implemented, defer the individual
+ * TLB invalidations until the entire walk is finished, and
+ * then use the range-based TLBI instructions to do the
+ * invalidations. Condition deferred TLB invalidation on the
+ * system supporting FWB as the optimization is entirely
+ * pointless when the unmap walker needs to perform CMOs.
+ */
+ return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
+ struct kvm_pgtable *pgt = ctx->arg;
+
/*
- * Clear the existing PTE, and perform break-before-make with
- * TLB maintenance if it was valid.
+ * Clear the existing PTE, and perform break-before-make if it was
+ * valid. Depending on the system support, defer the TLB maintenance
+ * for the same until the entire unmap walk is completed.
*/
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+ if (!stage2_unmap_defer_tlb_flush(pgt))
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+ ctx->addr, ctx->level);
}
mm_ops->put_page(ctx->ptep);
@@ -861,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
{
u64 phys = stage2_map_walker_phys_addr(ctx, data);
- if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+ if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
return false;
return kvm_block_mapping_supported(ctx, phys);
@@ -940,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (ret != -E2BIG)
return ret;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
if (!data->memcache)
@@ -1077,7 +1142,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
- stage2_put_pte(ctx, mmu, mm_ops);
+ stage2_unmap_put_pte(ctx, mmu, mm_ops);
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,20 +1156,26 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
+ int ret;
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ if (stage2_unmap_defer_tlb_flush(pgt))
+ /* Perform the deferred TLB invalidations */
+ kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+ return ret;
}
struct stage2_attr_data {
kvm_pte_t attr_set;
kvm_pte_t attr_clr;
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -1147,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
u64 size, kvm_pte_t attr_set,
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
- u32 *level, enum kvm_pgtable_walk_flags flags)
+ s8 *level, enum kvm_pgtable_walk_flags flags)
{
int ret;
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1249,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot)
{
int ret;
- u32 level;
+ s8 level;
kvm_pte_t set = 0, clr = 0;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
@@ -1267,7 +1338,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
- if (!ret)
+ if (!ret || ret == -EAGAIN)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
return ret;
}
@@ -1302,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
- u64 phys, u32 level,
+ u64 phys, s8 level,
enum kvm_pgtable_prot prot,
void *mc, bool force_pte)
{
@@ -1360,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
* fully populated tree up to the PTE entries. Note that @level is
* interpreted as in "level @level entry".
*/
-static int stage2_block_get_nr_page_tables(u32 level)
+static int stage2_block_get_nr_page_tables(s8 level)
{
switch (level) {
case 1:
@@ -1371,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level)
return 0;
default:
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
- level >= KVM_PGTABLE_MAX_LEVELS);
+ level > KVM_PGTABLE_LAST_LEVEL);
return -EINVAL;
};
}
@@ -1384,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_s2_mmu *mmu;
kvm_pte_t pte = ctx->old, new, *childp;
enum kvm_pgtable_prot prot;
- u32 level = ctx->level;
+ s8 level = ctx->level;
bool force_pte;
int nr_pages;
u64 phys;
/* No huge-pages exist at the last level */
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return 0;
/* We only split valid block mappings */
@@ -1464,10 +1535,10 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = mmu->arch->vtcr;
+ u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
@@ -1490,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
{
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
@@ -1526,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
pgt->pgd = NULL;
}
-void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
struct kvm_pgtable_walker walker = {
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..1581df6aec87 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
+ if (has_cntpoff()) {
+ struct timer_map map;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're entrering the guest. Reload the correct
+ * values from memory now that TGE is clear.
+ */
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+ if (map.direct_ptimer) {
+ write_sysreg_el0(val, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ if (has_cntpoff()) {
+ struct timer_map map;
+ u64 val, offset;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're exiting the guest. Save the latest CVAL value
+ * to memory and apply the offset now that TGE is set.
+ */
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+ if (map.direct_ptimer && offset) {
+ write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
@@ -93,12 +137,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
NOKPROBE_SYMBOL(__deactivate_traps);
/*
- * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * Disable IRQs in __vcpu_{load,put}_{activate,deactivate}_traps() to
* prevent a race condition between context switching of PMUSERENR_EL0
* in __{activate,deactivate}_traps_common() and IPIs that attempts to
* update PMUSERENR_EL0. See also kvm_set_pmuserenr().
*/
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+static void __vcpu_load_activate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -107,7 +151,7 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
+static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -116,6 +160,19 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_load_switch_sysregs(vcpu);
+ __vcpu_load_activate_traps(vcpu);
+ __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+}
+
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_put_deactivate_traps(vcpu);
+ __vcpu_put_switch_sysregs(vcpu);
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -126,6 +183,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
@@ -170,17 +228,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_save_host_state_vhe(host_ctxt);
/*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs_vhe above. We must now call
- * __load_stage2 before __activate_traps, because
- * __load_stage2 configures stage 2 translation, and
- * __activate_traps clear HCR_EL2.TGE (among other things).
+ * Note that ARM erratum 1165522 requires us to configure both stage 1
+ * and stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE. The stage 1 and stage 2 guest context has already been
+ * loaded on the CPU in kvm_vcpu_load_vhe().
*/
- __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
__activate_traps(vcpu);
__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index b35a178e7e0d..8e1e0d5033b6 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -52,7 +52,7 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
/**
- * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -62,7 +62,7 @@ NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
* and loading system register state early avoids having to load them on
* every entry to the VM.
*/
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
@@ -92,12 +92,10 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_el1_state(guest_ctxt);
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
-
- activate_traps_vhe_load(vcpu);
}
/**
- * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -107,13 +105,12 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
* and deferring saving system register state until we're no longer running the
* VCPU avoids having to save them on every exit from the VM.
*/
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- deactivate_traps_vhe_put(vcpu);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index e69da550cdc5..b32e2940df7d 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -11,18 +11,25 @@
#include <asm/tlbflush.h>
struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
+ struct kvm_s2_mmu *mmu;
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
local_irq_save(cxt->flags);
+ if (vcpu && mmu != vcpu->arch.hw_mmu)
+ cxt->mmu = vcpu->arch.hw_mmu;
+ else
+ cxt->mmu = NULL;
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/*
* For CPUs that are affected by ARM errata 1165522 or 1530923,
@@ -66,10 +73,13 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
* We're done with the TLB operation, let's restore the host's
* view of HCR_EL2.
*/
- write_sysreg(0, vttbr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
isb();
+ /* ... and the stage-2 MMU context that we switched away from */
+ if (cxt->mmu)
+ __load_stage2(cxt->mmu, cxt->mmu->arch);
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, SYS_TCR);
@@ -143,6 +153,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
@@ -178,18 +216,5 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}