aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/cpufeature.h23
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/sysreg.h38
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S4
-rw-r--r--arch/arm64/kvm/arm.c35
-rw-r--r--arch/arm64/kvm/guest.c4
-rw-r--r--arch/arm64/kvm/mmu.c46
-rw-r--r--arch/arm64/kvm/perf.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c14
-rw-r--r--arch/arm64/kvm/reset.c27
-rw-r--r--arch/arm64/kvm/sys_regs.c70
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic.c39
-rw-r--r--arch/arm64/kvm/vgic/vgic.h2
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/page-flags.h37
-rw-r--r--virt/kvm/kvm_main.c19
19 files changed, 228 insertions, 212 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..c4dabba9cfdf 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -657,7 +657,8 @@ static inline bool system_supports_4kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN4_SHIFT);
- return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+ return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
}
static inline bool system_supports_64kb_granule(void)
@@ -669,7 +670,8 @@ static inline bool system_supports_64kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN64_SHIFT);
- return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+ return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
}
static inline bool system_supports_16kb_granule(void)
@@ -681,7 +683,8 @@ static inline bool system_supports_16kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN16_SHIFT);
- return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+ return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -781,13 +784,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
- case 0: return 32;
- case 1: return 36;
- case 2: return 40;
- case 3: return 42;
- case 4: return 44;
- case 5: return 48;
- case 6: return 52;
+ case ID_AA64MMFR0_PARANGE_32: return 32;
+ case ID_AA64MMFR0_PARANGE_36: return 36;
+ case ID_AA64MMFR0_PARANGE_40: return 40;
+ case ID_AA64MMFR0_PARANGE_42: return 42;
+ case ID_AA64MMFR0_PARANGE_44: return 44;
+ case ID_AA64MMFR0_PARANGE_48: return 48;
+ case ID_AA64MMFR0_PARANGE_52: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..62ac4174e608 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
int kvm_arm_init_sve(void);
-int __attribute_const__ kvm_target_cpu(void);
+u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -185,7 +185,6 @@ enum vcpu_sysreg {
PMCNTENSET_EL0, /* Count Enable Set Register */
PMINTENSET_EL1, /* Interrupt Enable Set Register */
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
- PMSWINC_EL0, /* Software Increment Register */
PMUSERENR_EL0, /* User Enable Register */
/* Pointer Authentication Registers in a strict increasing order. */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..2084b22613e2 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -847,15 +847,26 @@
#define ID_AA64MMFR0_ASID_SHIFT 4
#define ID_AA64MMFR0_PARANGE_SHIFT 0
-#define ID_AA64MMFR0_TGRAN4_NI 0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN64_NI 0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN16_NI 0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+#define ID_AA64MMFR0_TGRAN4_NI 0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_TGRAN64_NI 0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_TGRAN16_NI 0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf
+
+#define ID_AA64MMFR0_PARANGE_32 0x0
+#define ID_AA64MMFR0_PARANGE_36 0x1
+#define ID_AA64MMFR0_PARANGE_40 0x2
+#define ID_AA64MMFR0_PARANGE_42 0x3
+#define ID_AA64MMFR0_PARANGE_44 0x4
#define ID_AA64MMFR0_PARANGE_48 0x5
#define ID_AA64MMFR0_PARANGE_52 0x6
+#define ARM64_MIN_PARANGE_BITS 32
+
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2
@@ -1028,16 +1039,19 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
#elif defined(CONFIG_ARM64_64K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
#endif
#define MVFR2_FPMISC_SHIFT 4
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 709d2c433c5e..f6b1a88245db 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -181,6 +181,8 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
+ HYPERVISOR_DATA_SECTIONS
+
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
idmap_pg_end = .;
@@ -260,8 +262,6 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
- HYPERVISOR_DATA_SECTIONS
-
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 37f738877aa3..fd9a6bcf797f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -15,6 +15,7 @@
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/sched.h>
+#include <linux/kmemleak.h>
#include <linux/kvm.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
@@ -42,10 +43,6 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
-#ifdef REQUIRES_VIRT
-__asm__(".arch_extension virt");
-#endif
-
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
@@ -1039,7 +1036,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
unsigned int i, ret;
- int phys_target = kvm_target_cpu();
+ u32 phys_target = kvm_target_cpu();
if (init->target != phys_target)
return -EINVAL;
@@ -1700,11 +1697,6 @@ static bool init_psci_relay(void)
return true;
}
-static int init_common_resources(void)
-{
- return kvm_set_ipa_limit();
-}
-
static int init_subsystems(void)
{
int err = 0;
@@ -1964,6 +1956,13 @@ static int finalize_hyp_mode(void)
return 0;
/*
+ * Exclude HYP BSS from kmemleak so that it doesn't get peeked
+ * at, which would end badly once the section is inaccessible.
+ * None of other sections should ever be introspected.
+ */
+ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+
+ /*
* Flip the static key upfront as that may no longer be possible
* once the host stage 2 is installed.
*/
@@ -1973,11 +1972,6 @@ static int finalize_hyp_mode(void)
return 0;
}
-static void check_kvm_target_cpu(void *ret)
-{
- *(int *)ret = kvm_target_cpu();
-}
-
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
@@ -2037,7 +2031,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
int kvm_arch_init(void *opaque)
{
int err;
- int ret, cpu;
bool in_hyp_mode;
if (!is_hyp_mode_available()) {
@@ -2052,15 +2045,7 @@ int kvm_arch_init(void *opaque)
kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
"Only trusted guests should be used on this system.\n");
- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
- if (ret < 0) {
- kvm_err("Error, CPU %d not supported!\n", cpu);
- return -ENODEV;
- }
- }
-
- err = init_common_resources();
+ err = kvm_set_ipa_limit();
if (err)
return err;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1dfb83578277..8ce850fa7b49 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -842,7 +842,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
return 0;
}
-int __attribute_const__ kvm_target_cpu(void)
+u32 __attribute_const__ kvm_target_cpu(void)
{
unsigned long implementor = read_cpuid_implementor();
unsigned long part_number = read_cpuid_part_number();
@@ -874,7 +874,7 @@ int __attribute_const__ kvm_target_cpu(void)
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
- int target = kvm_target_cpu();
+ u32 target = kvm_target_cpu();
if (target < 0)
return -ENODEV;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index cbab146cda6a..3f0aabb3a1e4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -81,6 +81,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+ ++kvm->stat.generic.remote_tlb_flush;
}
static bool kvm_is_device_pfn(unsigned long pfn)
@@ -453,6 +454,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
return 0;
}
+static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
+ /* We shouldn't need any other callback to walk the PT */
+ .phys_to_virt = kvm_host_va,
+};
+
+static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+ struct kvm_pgtable pgt = {
+ .pgd = (kvm_pte_t *)kvm->mm->pgd,
+ .ia_bits = VA_BITS,
+ .start_level = (KVM_PGTABLE_MAX_LEVELS -
+ CONFIG_PGTABLE_LEVELS),
+ .mm_ops = &kvm_user_mm_ops,
+ };
+ kvm_pte_t pte = 0; /* Keep GCC quiet... */
+ u32 level = ~0;
+ int ret;
+
+ ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+ VM_BUG_ON(ret);
+ VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+ VM_BUG_ON(!(pte & PTE_VALID));
+
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+}
+
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
@@ -800,7 +827,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
* Returns the size of the mapping.
*/
static unsigned long
-transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
{
@@ -811,8 +838,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
- if (kvm_is_transparent_hugepage(pfn) &&
- fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+ get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
@@ -834,7 +861,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
- kvm_get_pfn(pfn);
+ get_page(pfn_to_page(pfn));
*pfnp = pfn;
return PMD_SIZE;
@@ -1070,9 +1097,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
- vma_pagesize = transparent_hugepage_adjust(memslot, hva,
- &pfn, &fault_ipa);
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+ if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+ vma_pagesize = fault_granule;
+ else
+ vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+ hva, &pfn,
+ &fault_ipa);
+ }
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index 151c31fb9860..f9bb3b14130e 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void)
{
- if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
+ if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
static_branch_enable(&kvm_arm_pmu_available);
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f33825c995cb..f5065f23b413 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
- reg &= kvm_pmu_valid_counter_mask(vcpu);
}
return reg;
@@ -564,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
- unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
int i;
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
} else {
- kvm_pmu_disable_counter_mask(vcpu, mask);
+ kvm_pmu_disable_counter_mask(vcpu,
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
}
if (val & ARMV8_PMU_PMCR_C)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
@@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
- int pmuver = 0xf;
+ int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
/*
* Create a dummy event that only counts user cycles. As we'll never
@@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
- return 0xf;
+ return ID_AA64DFR0_PMUVER_IMP_DEF;
}
if (event->pmu) {
@@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
- if (vcpu->kvm->arch.pmuver == 0xf)
+ if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
return -ENODEV;
switch (attr->attr) {
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index cba7872d69a8..08fd487d5f95 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -311,31 +311,26 @@ u32 get_kvm_ipa_limit(void)
int kvm_set_ipa_limit(void)
{
- unsigned int parange, tgran_2;
+ unsigned int parange;
u64 mmfr0;
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_PARANGE_SHIFT);
+ /*
+ * IPA size beyond 48 bits could not be supported
+ * on either 4K or 16K page size. Hence let's cap
+ * it to 48 bits, in case it's reported as larger
+ * on the system.
+ */
+ if (PAGE_SIZE != SZ_64K)
+ parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
/*
* Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
* Stage-2. If not, things will stop very quickly.
*/
- switch (PAGE_SIZE) {
- default:
- case SZ_4K:
- tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
- break;
- case SZ_16K:
- tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
- break;
- case SZ_64K:
- tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
- break;
- }
-
- switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
+ switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
return -EINVAL;
@@ -369,7 +364,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
- phys_shift < 32)
+ phys_shift < ARM64_MIN_PARANGE_BITS)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6f126eb6ac1..a1f5101f49a3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -603,6 +603,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ /* No PMU available, any PMU reg may UNDEF... */
+ if (!kvm_arm_support_pmu_v3())
+ return;
+
+ n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
+ n &= ARMV8_PMU_PMCR_N_MASK;
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= mask;
+}
+
+static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+}
+
+static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+}
+
+static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+}
+
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 pmcr, val;
@@ -845,7 +880,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
kvm_pmu_disable_counter_mask(vcpu, val);
}
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
}
return true;
@@ -869,7 +904,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* accessing PMINTENCLR_EL1 */
__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
}
return true;
@@ -891,7 +926,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* accessing PMOVSCLR_EL0 */
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
}
return true;
@@ -944,16 +979,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
#define PMU_SYS_REG(r) \
- SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
+ SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
+ .reset = reset_pmevcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
+ .reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -1249,6 +1286,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return __set_id_reg(vcpu, rd, uaddr, true);
}
+static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ int err;
+ u64 val;
+
+ /* Perform the access even if we are going to ignore the value */
+ err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
+ if (err)
+ return err;
+
+ return 0;
+}
+
static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1592,16 +1643,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
+ /*
+ * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
+ * previously (and pointlessly) advertised in the past...
+ */
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
- .access = access_pmswinc, .reg = PMSWINC_EL0 },
+ .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+ .access = access_pmswinc, .reset = NULL },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
- .access = access_pmselr, .reg = PMSELR_EL0 },
+ .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
{ PMU_SYS_REG(SYS_PMCEID0_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
- .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
+ .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index a016f07adc28..5f9014ae595b 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
case GIC_CPU_PRIMASK:
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
- * the PMR field as GICH_VMCR.VMPriMask rather than
+ * PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
@@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
case GIC_CPU_PRIMASK:
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
- * the PMR field as GICH_VMCR.VMPriMask rather than
+ * PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 2c580204f1dc..95a18cec14a3 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
+ bool deactivated;
/* Extract the source vCPU id from the LR */
cpuid = val & GICH_LR_PHYSID_CPUID;
@@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
raw_spin_lock(&irq->irq_lock);
- /* Always preserve the active bit */
+ /* Always preserve the active bit, note deactivation */
+ deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
if (irq->active && vgic_irq_is_sgi(intid))
@@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
irq->pending_latch = false;
- /*
- * Level-triggered mapped IRQs are special because we only
- * observe rising edges as input to the VGIC.
- *
- * If the guest never acked the interrupt we have to sample
- * the physical line and set the line level, because the
- * device state could have changed or we simply need to
- * process the still pending interrupt later.
- *
- * If this causes us to lower the level, we have to also clear
- * the physical active state, since we will otherwise never be
- * told when the interrupt becomes asserted again.
- *
- * Another case is when the interrupt requires a helping hand
- * on deactivation (no HW deactivation, for example).
- */
- if (vgic_irq_is_mapped_level(irq)) {
- bool resample = false;
-
- if (val & GICH_LR_PENDING_BIT) {
- irq->line_level = vgic_get_phys_line_level(irq);
- resample = !irq->line_level;
- } else if (vgic_irq_needs_resampling(irq) &&
- !(irq->active || irq->pending_latch)) {
- resample = true;
- }
-
- if (resample)
- vgic_irq_set_phys_active(irq, false);
- }
+ /* Handle resampling for mapped interrupts if required */
+ vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 66004f61cd83..21a6207fb2ee 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
u32 intid, cpuid;
struct vgic_irq *irq;
bool is_v2_sgi = false;
+ bool deactivated;
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
@@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
raw_spin_lock(&irq->irq_lock);
- /* Always preserve the active bit */
+ /* Always preserve the active bit, note deactivation */
+ deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
if (irq->active && is_v2_sgi)
@@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
irq->pending_latch = false;
- /*
- * Level-triggered mapped IRQs are special because we only
- * observe rising edges as input to the VGIC.
- *
- * If the guest never acked the interrupt we have to sample
- * the physical line and set the line level, because the
- * device state could have changed or we simply need to
- * process the still pending interrupt later.
- *
- * If this causes us to lower the level, we have to also clear
- * the physical active state, since we will otherwise never be
- * told when the interrupt becomes asserted again.
- *
- * Another case is when the interrupt requires a helping hand
- * on deactivation (no HW deactivation, for example).
- */
- if (vgic_irq_is_mapped_level(irq)) {
- bool resample = false;
-
- if (val & ICH_LR_PENDING_BIT) {
- irq->line_level = vgic_get_phys_line_level(irq);
- resample = !irq->line_level;
- } else if (vgic_irq_needs_resampling(irq) &&
- !(irq->active || irq->pending_latch)) {
- resample = true;
- }
-
- if (resample)
- vgic_irq_set_phys_active(irq, false);
- }
+ /* Handle resampling for mapped interrupts if required */
+ vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 111bff47e471..5dad4996cfb2 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
if (intid >= VGIC_MIN_LPI)
return vgic_get_lpi(kvm, intid);
- WARN(1, "Looking up struct vgic_irq for reserved INTID");
return NULL;
}
@@ -1022,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
return map_is_active;
}
+
+/*
+ * Level-triggered mapped IRQs are special because we only observe rising
+ * edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample the physical
+ * line and set the line level, because the device state could have changed
+ * or we simply need to process the still pending interrupt later.
+ *
+ * We could also have entered the guest with the interrupt active+pending.
+ * On the next exit, we need to re-evaluate the pending state, as it could
+ * otherwise result in a spurious interrupt by injecting a now potentially
+ * stale pending state.
+ *
+ * If this causes us to lower the level, we have to also clear the physical
+ * active state, since we will otherwise never be told when the interrupt
+ * becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand on
+ * deactivation (no HW deactivation, for example).
+ */
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+ bool lr_deactivated, bool lr_pending)
+{
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (unlikely(vgic_irq_needs_resampling(irq))) {
+ resample = !(irq->active || irq->pending_latch);
+ } else if (lr_pending || (lr_deactivated && irq->line_level)) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ }
+
+ if (resample)
+ vgic_irq_set_phys_active(irq, false);
+ }
+}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index dc1f3d1657ee..14a9218641f5 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags);
void vgic_kick_vcpus(struct kvm *kvm);
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+ bool lr_deactivated, bool lr_pending);
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
phys_addr_t addr, phys_addr_t alignment);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..9818d271c2a1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_get_pfn(kvm_pfn_t pfn);
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5922031ffab6..1ace27c4a8e0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -633,43 +633,6 @@ static inline int PageTransCompound(struct page *page)
}
/*
- * PageTransCompoundMap is the same as PageTransCompound, but it also
- * guarantees the primary MMU has the entire compound page mapped
- * through pmd_trans_huge, which in turn guarantees the secondary MMUs
- * can also map the entire compound page. This allows the secondary
- * MMUs to call get_user_pages() only once for each compound page and
- * to immediately map the entire compound page with a single secondary
- * MMU fault. If there will be a pmd split later, the secondary MMUs
- * will get an update through the MMU notifier invalidation through
- * split_huge_pmd().
- *
- * Unlike PageTransCompound, this is safe to be called only while
- * split_huge_pmd() cannot run from under us, like if protected by the
- * MMU notifier, otherwise it may result in page->_mapcount check false
- * positives.
- *
- * We have to treat page cache THP differently since every subpage of it
- * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
- * mapped in the current process so comparing subpage's _mapcount to
- * compound_mapcount to filter out PTE mapped case.
- */
-static inline int PageTransCompoundMap(struct page *page)
-{
- struct page *head;
-
- if (!PageTransCompound(page))
- return 0;
-
- if (PageAnon(page))
- return atomic_read(&page->_mapcount) < 0;
-
- head = compound_head(page);
- /* File THP is PMD mapped and not PTE mapped */
- return atomic_read(&page->_mapcount) ==
- atomic_read(compound_mapcount_ptr(head));
-}
-
-/*
* PageTransTail returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
* that hugetlbfs pages aren't involved.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d20fba0fc290..1d3a03c0fed3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -189,16 +189,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
return true;
}
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (!PageTransCompoundMap(page))
- return false;
-
- return is_transparent_hugepage(compound_head(page));
-}
-
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
@@ -2225,7 +2215,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* Get a reference here because callers of *hva_to_pfn* and
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
- * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+ * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
* simply do nothing for reserved pfns.
*
* Whoever called remap_pfn_range is also going to call e.g.
@@ -2622,13 +2612,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
-void kvm_get_pfn(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn))
- get_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_get_pfn);
-
static int next_segment(unsigned long len, int offset)
{
if (len > PAGE_SIZE - offset)