diff options
-rw-r--r-- | Documentation/virt/kvm/api.rst | 22 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/pvtime.c | 29 | ||||
-rw-r--r-- | arch/arm64/kvm/trace_arm.h | 16 | ||||
-rw-r--r-- | arch/arm64/kvm/trace_handle_exit.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 3 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 31 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 1 |
10 files changed, 77 insertions, 37 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eb3a1316f03e..d2b733dc7892 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. 8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH ----------------------------------- -:Architecture: x86 +:Architectures: x86 This capability indicates that KVM running on top of Hyper-V hypervisor enables Direct TLB flush for its guests meaning that TLB flush @@ -6143,19 +6143,33 @@ in CPUID and only exposes Hyper-V identification. In this case, guest thinks it's running on Hyper-V and only use Hyper-V hypercalls. 8.22 KVM_CAP_S390_VCPU_RESETS +----------------------------- -Architectures: s390 +:Architectures: s390 This capability indicates that the KVM_S390_NORMAL_RESET and KVM_S390_CLEAR_RESET ioctls are available. 8.23 KVM_CAP_S390_PROTECTED +--------------------------- -Architecture: s390 - +:Architectures: s390 This capability indicates that the Ultravisor has been initialized and KVM can therefore start protected VMs. This capability governs the KVM_S390_PV_COMMAND ioctl and the KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected guests when the state change is invalid. + +8.24 KVM_CAP_STEAL_TIME +----------------------- + +:Architectures: arm64, x86 + +This capability indicates that KVM supports steal time accounting. +When steal time accounting is supported it may be enabled with +architecture-specific interfaces. This capability and the architecture- +specific interfaces must be consistent, i.e. if one says the feature +is supported, than the other should as well and vice versa. For arm64 +see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL". +For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME". diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41caf29bd93c..317bafde1442 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -368,7 +368,6 @@ struct kvm_vcpu_arch { /* Guest PV state */ struct { - u64 steal; u64 last_steal; gpa_t base; } steal; @@ -544,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); void kvm_update_stolen_time(struct kvm_vcpu *vcpu); +bool kvm_arm_pvtime_supported(void); int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 262a0afbcc27..189b4c648cbb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = 1; break; + case KVM_CAP_STEAL_TIME: + r = kvm_arm_pvtime_supported(); + break; default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 21b70abf65a7..e8a5179965d5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -785,6 +785,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { force_pte = true; vma_pagesize = PAGE_SIZE; + vma_shift = PAGE_SHIFT; } if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index f7b52ce1557e..920ac43077ad 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -13,25 +13,22 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - u64 steal; - __le64 steal_le; - u64 offset; - int idx; u64 base = vcpu->arch.steal.base; + u64 last_steal = vcpu->arch.steal.last_steal; + u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + u64 steal = 0; + int idx; if (base == GPA_INVALID) return; - /* Let's do the local bookkeeping */ - steal = vcpu->arch.steal.steal; - steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; - vcpu->arch.steal.last_steal = current->sched_info.run_delay; - vcpu->arch.steal.steal = steal; - - steal_le = cpu_to_le64(steal); idx = srcu_read_lock(&kvm->srcu); - offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); - kvm_put_guest(kvm, base + offset, steal_le, u64); + if (!kvm_get_guest(kvm, base + offset, steal)) { + steal = le64_to_cpu(steal); + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.steal.last_steal - last_steal; + kvm_put_guest(kvm, base + offset, cpu_to_le64(steal)); + } srcu_read_unlock(&kvm->srcu, idx); } @@ -43,7 +40,8 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) switch (feature) { case ARM_SMCCC_HV_PV_TIME_FEATURES: case ARM_SMCCC_HV_PV_TIME_ST: - val = SMCCC_RET_SUCCESS; + if (vcpu->arch.steal.base != GPA_INVALID) + val = SMCCC_RET_SUCCESS; break; } @@ -64,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * Start counting stolen time from the time the guest requests * the feature enabled. */ - vcpu->arch.steal.steal = 0; vcpu->arch.steal.last_steal = current->sched_info.run_delay; idx = srcu_read_lock(&kvm->srcu); @@ -74,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) return base; } -static bool kvm_arm_pvtime_supported(void) +bool kvm_arm_pvtime_supported(void) { return !!sched_info_on(); } diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 4691053c5ee4..ff0444352bba 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -23,7 +23,7 @@ TRACE_EVENT(kvm_entry, __entry->vcpu_pc = vcpu_pc; ), - TP_printk("PC: 0x%08lx", __entry->vcpu_pc) + TP_printk("PC: 0x%016lx", __entry->vcpu_pc) ); TRACE_EVENT(kvm_exit, @@ -42,7 +42,7 @@ TRACE_EVENT(kvm_exit, __entry->vcpu_pc = vcpu_pc; ), - TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx", __print_symbolic(__entry->ret, kvm_arm_exception_type), __entry->esr_ec, __print_symbolic(__entry->esr_ec, kvm_arm_exception_class), @@ -69,7 +69,7 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx", __entry->ipa, __entry->hsr, __entry->hxfar, __entry->vcpu_pc) ); @@ -131,7 +131,7 @@ TRACE_EVENT(kvm_mmio_emulate, __entry->cpsr = cpsr; ), - TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)", __entry->vcpu_pc, __entry->instr, __entry->cpsr) ); @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_unmap_hva_range, __entry->end = end; ), - TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", + TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", __entry->start, __entry->end) ); @@ -165,7 +165,7 @@ TRACE_EVENT(kvm_set_spte_hva, __entry->hva = hva; ), - TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) + TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) ); TRACE_EVENT(kvm_age_hva, @@ -182,7 +182,7 @@ TRACE_EVENT(kvm_age_hva, __entry->end = end; ), - TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + TP_printk("mmu notifier age hva: %#016lx -- %#016lx", __entry->start, __entry->end) ); @@ -198,7 +198,7 @@ TRACE_EVENT(kvm_test_age_hva, __entry->hva = hva; ), - TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) + TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) ); TRACE_EVENT(kvm_set_way_flush, diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h index 2c56d1e0f5bd..8d78acc4fba7 100644 --- a/arch/arm64/kvm/trace_handle_exit.h +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -22,7 +22,7 @@ TRACE_EVENT(kvm_wfx_arm64, __entry->is_wfe = is_wfe; ), - TP_printk("guest executed wf%c at: 0x%08lx", + TP_printk("guest executed wf%c at: 0x%016lx", __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) ); @@ -42,7 +42,7 @@ TRACE_EVENT(kvm_hvc_arm64, __entry->imm = imm; ), - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)", __entry->vcpu_pc, __entry->r0, __entry->imm) ); @@ -135,7 +135,7 @@ TRACE_EVENT(trap_reg, __entry->write_value = write_value; ), - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) + TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) ); TRACE_EVENT(kvm_handle_sys_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d39d6cf1d473..75270229a8bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3578,6 +3578,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SMALLER_MAXPHYADDR: r = (int) allow_smaller_maxphyaddr; break; + case KVM_CAP_STEAL_TIME: + r = sched_info_on(); + break; default: break; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a23076765b4c..05e3c2fb3ef7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,25 +749,46 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); -#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +#define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ - type __user *__uaddr = (type __user *)(__addr + offset); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ - __ret = put_user(value, __uaddr); \ + __ret = get_user(v, __uaddr); \ + __ret; \ +}) + +#define kvm_get_guest(kvm, gpa, v) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + \ + __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), v); \ +}) + +#define __kvm_put_guest(kvm, gfn, offset, v) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(v, __uaddr); \ if (!__ret) \ mark_page_dirty(kvm, gfn); \ __ret; \ }) -#define kvm_put_guest(kvm, gpa, value, type) \ +#define kvm_put_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ + \ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ - offset_in_page(__gpa), (value), type); \ + offset_in_page(__gpa), v); \ }) int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f6d86033c4fa..3d8023474f2a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_LAST_CPU 184 #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 #ifdef KVM_CAP_IRQ_ROUTING |