aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst22
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/kvm/arm.c3
-rw-r--r--arch/arm64/kvm/mmu.c1
-rw-r--r--arch/arm64/kvm/pvtime.c29
-rw-r--r--arch/arm64/kvm/trace_arm.h16
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h6
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--include/linux/kvm_host.h31
-rw-r--r--include/uapi/linux/kvm.h1
10 files changed, 77 insertions, 37 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eb3a1316f03e..d2b733dc7892 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
-----------------------------------
-:Architecture: x86
+:Architectures: x86
This capability indicates that KVM running on top of Hyper-V hypervisor
enables Direct TLB flush for its guests meaning that TLB flush
@@ -6143,19 +6143,33 @@ in CPUID and only exposes Hyper-V identification. In this case, guest
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
8.22 KVM_CAP_S390_VCPU_RESETS
+-----------------------------
-Architectures: s390
+:Architectures: s390
This capability indicates that the KVM_S390_NORMAL_RESET and
KVM_S390_CLEAR_RESET ioctls are available.
8.23 KVM_CAP_S390_PROTECTED
+---------------------------
-Architecture: s390
-
+:Architectures: s390
This capability indicates that the Ultravisor has been initialized and
KVM can therefore start protected VMs.
This capability governs the KVM_S390_PV_COMMAND ioctl and the
KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
guests when the state change is invalid.
+
+8.24 KVM_CAP_STEAL_TIME
+-----------------------
+
+:Architectures: arm64, x86
+
+This capability indicates that KVM supports steal time accounting.
+When steal time accounting is supported it may be enabled with
+architecture-specific interfaces. This capability and the architecture-
+specific interfaces must be consistent, i.e. if one says the feature
+is supported, than the other should as well and vice versa. For arm64
+see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL".
+For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME".
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41caf29bd93c..317bafde1442 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -368,7 +368,6 @@ struct kvm_vcpu_arch {
/* Guest PV state */
struct {
- u64 steal;
u64 last_steal;
gpa_t base;
} steal;
@@ -544,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+bool kvm_arm_pvtime_supported(void);
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 262a0afbcc27..189b4c648cbb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
+ case KVM_CAP_STEAL_TIME:
+ r = kvm_arm_pvtime_supported();
+ break;
default:
r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
break;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 21b70abf65a7..e8a5179965d5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -785,6 +785,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
force_pte = true;
vma_pagesize = PAGE_SIZE;
+ vma_shift = PAGE_SHIFT;
}
if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c
index f7b52ce1557e..920ac43077ad 100644
--- a/arch/arm64/kvm/pvtime.c
+++ b/arch/arm64/kvm/pvtime.c
@@ -13,25 +13,22 @@
void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- u64 steal;
- __le64 steal_le;
- u64 offset;
- int idx;
u64 base = vcpu->arch.steal.base;
+ u64 last_steal = vcpu->arch.steal.last_steal;
+ u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+ u64 steal = 0;
+ int idx;
if (base == GPA_INVALID)
return;
- /* Let's do the local bookkeeping */
- steal = vcpu->arch.steal.steal;
- steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
- vcpu->arch.steal.last_steal = current->sched_info.run_delay;
- vcpu->arch.steal.steal = steal;
-
- steal_le = cpu_to_le64(steal);
idx = srcu_read_lock(&kvm->srcu);
- offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
- kvm_put_guest(kvm, base + offset, steal_le, u64);
+ if (!kvm_get_guest(kvm, base + offset, steal)) {
+ steal = le64_to_cpu(steal);
+ vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay);
+ steal += vcpu->arch.steal.last_steal - last_steal;
+ kvm_put_guest(kvm, base + offset, cpu_to_le64(steal));
+ }
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -43,7 +40,8 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
switch (feature) {
case ARM_SMCCC_HV_PV_TIME_FEATURES:
case ARM_SMCCC_HV_PV_TIME_ST:
- val = SMCCC_RET_SUCCESS;
+ if (vcpu->arch.steal.base != GPA_INVALID)
+ val = SMCCC_RET_SUCCESS;
break;
}
@@ -64,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
* Start counting stolen time from the time the guest requests
* the feature enabled.
*/
- vcpu->arch.steal.steal = 0;
vcpu->arch.steal.last_steal = current->sched_info.run_delay;
idx = srcu_read_lock(&kvm->srcu);
@@ -74,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
return base;
}
-static bool kvm_arm_pvtime_supported(void)
+bool kvm_arm_pvtime_supported(void)
{
return !!sched_info_on();
}
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 4691053c5ee4..ff0444352bba 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -23,7 +23,7 @@ TRACE_EVENT(kvm_entry,
__entry->vcpu_pc = vcpu_pc;
),
- TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+ TP_printk("PC: 0x%016lx", __entry->vcpu_pc)
);
TRACE_EVENT(kvm_exit,
@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_exit,
__entry->vcpu_pc = vcpu_pc;
),
- TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
+ TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx",
__print_symbolic(__entry->ret, kvm_arm_exception_type),
__entry->esr_ec,
__print_symbolic(__entry->esr_ec, kvm_arm_exception_class),
@@ -69,7 +69,7 @@ TRACE_EVENT(kvm_guest_fault,
__entry->ipa = ipa;
),
- TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+ TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx",
__entry->ipa, __entry->hsr,
__entry->hxfar, __entry->vcpu_pc)
);
@@ -131,7 +131,7 @@ TRACE_EVENT(kvm_mmio_emulate,
__entry->cpsr = cpsr;
),
- TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+ TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)",
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
);
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_unmap_hva_range,
__entry->end = end;
),
- TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
+ TP_printk("mmu notifier unmap range: %#016lx -- %#016lx",
__entry->start, __entry->end)
);
@@ -165,7 +165,7 @@ TRACE_EVENT(kvm_set_spte_hva,
__entry->hva = hva;
),
- TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
+ TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
);
TRACE_EVENT(kvm_age_hva,
@@ -182,7 +182,7 @@ TRACE_EVENT(kvm_age_hva,
__entry->end = end;
),
- TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+ TP_printk("mmu notifier age hva: %#016lx -- %#016lx",
__entry->start, __entry->end)
);
@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_test_age_hva,
__entry->hva = hva;
),
- TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+ TP_printk("mmu notifier test age hva: %#016lx", __entry->hva)
);
TRACE_EVENT(kvm_set_way_flush,
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index 2c56d1e0f5bd..8d78acc4fba7 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -22,7 +22,7 @@ TRACE_EVENT(kvm_wfx_arm64,
__entry->is_wfe = is_wfe;
),
- TP_printk("guest executed wf%c at: 0x%08lx",
+ TP_printk("guest executed wf%c at: 0x%016lx",
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_hvc_arm64,
__entry->imm = imm;
),
- TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
+ TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)",
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
@@ -135,7 +135,7 @@ TRACE_EVENT(trap_reg,
__entry->write_value = write_value;
),
- TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
+ TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d39d6cf1d473..75270229a8bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3578,6 +3578,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SMALLER_MAXPHYADDR:
r = (int) allow_smaller_maxphyaddr;
break;
+ case KVM_CAP_STEAL_TIME:
+ r = sched_info_on();
+ break;
default:
break;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a23076765b4c..05e3c2fb3ef7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -749,25 +749,46 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len);
-#define __kvm_put_guest(kvm, gfn, offset, value, type) \
+#define __kvm_get_guest(kvm, gfn, offset, v) \
({ \
unsigned long __addr = gfn_to_hva(kvm, gfn); \
- type __user *__uaddr = (type __user *)(__addr + offset); \
+ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
int __ret = -EFAULT; \
\
if (!kvm_is_error_hva(__addr)) \
- __ret = put_user(value, __uaddr); \
+ __ret = get_user(v, __uaddr); \
+ __ret; \
+})
+
+#define kvm_get_guest(kvm, gpa, v) \
+({ \
+ gpa_t __gpa = gpa; \
+ struct kvm *__kvm = kvm; \
+ \
+ __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \
+ offset_in_page(__gpa), v); \
+})
+
+#define __kvm_put_guest(kvm, gfn, offset, v) \
+({ \
+ unsigned long __addr = gfn_to_hva(kvm, gfn); \
+ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
+ int __ret = -EFAULT; \
+ \
+ if (!kvm_is_error_hva(__addr)) \
+ __ret = put_user(v, __uaddr); \
if (!__ret) \
mark_page_dirty(kvm, gfn); \
__ret; \
})
-#define kvm_put_guest(kvm, gpa, value, type) \
+#define kvm_put_guest(kvm, gpa, v) \
({ \
gpa_t __gpa = gpa; \
struct kvm *__kvm = kvm; \
+ \
__kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \
- offset_in_page(__gpa), (value), type); \
+ offset_in_page(__gpa), v); \
})
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f6d86033c4fa..3d8023474f2a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_LAST_CPU 184
#define KVM_CAP_SMALLER_MAXPHYADDR 185
#define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_STEAL_TIME 187
#ifdef KVM_CAP_IRQ_ROUTING