aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c172
1 files changed, 93 insertions, 79 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a45d8580f91e..f9939d0722b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -422,6 +422,7 @@ struct nested_vmx {
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset;
+ bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
@@ -435,6 +436,8 @@ struct nested_vmx {
bool pi_pending;
u16 posted_intr_nv;
+ unsigned long *msr_bitmap;
+
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -937,6 +939,7 @@ static DEFINE_SPINLOCK(vmx_vpid_lock);
static struct vmcs_config {
int size;
int order;
+ u32 basic_cap;
u32 revision_id;
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
@@ -1213,6 +1216,11 @@ static inline bool cpu_has_vmx_ple(void)
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
}
+static inline bool cpu_has_vmx_basic_inout(void)
+{
+ return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
+}
+
static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
{
return flexpriority_enabled && lapic_in_kernel(vcpu);
@@ -2198,6 +2206,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
new.control) != old.control);
}
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+ vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2256,10 +2270,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* Setup TSC multiplier */
if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
- vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
- }
+ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+ decache_tsc_multiplier(vmx);
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
@@ -2508,7 +2520,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
unsigned long *msr_bitmap;
if (is_guest_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_nested;
+ msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -2871,6 +2883,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
*pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+ if (cpu_has_vmx_basic_inout())
+ *pdata |= VMX_BASIC_INOUT;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
@@ -3451,7 +3465,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_config.size);
+ vmcs_conf->order = get_order(vmcs_conf->size);
+ vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
@@ -6103,7 +6118,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
gla_validity = (exit_qualification >> 7) & 0x3;
- if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
+ if (gla_validity == 0x2) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
@@ -6363,13 +6378,6 @@ static __init int hardware_setup(void)
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
- if (nested) {
- vmx_msr_bitmap_nested =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_nested)
- goto out5;
- }
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
@@ -6392,8 +6400,6 @@ static __init int hardware_setup(void)
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
- if (nested)
- memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
@@ -6529,9 +6535,6 @@ out8:
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6560,6 @@ static __exit void hardware_unsetup(void)
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap);
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
free_kvm_area();
}
@@ -6734,7 +6735,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
{
/* TODO: not to reset guest simply here. */
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+ pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
}
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -6995,16 +6996,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx->nested.msr_bitmap)
+ goto out_msr_bitmap;
+ }
+
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
- return -ENOMEM;
+ goto out_cached_vmcs12;
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs) {
- kfree(vmx->nested.cached_vmcs12);
- return -ENOMEM;
- }
+ if (!shadow_vmcs)
+ goto out_shadow_vmcs;
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -7016,7 +7022,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.vmcs02_num = 0;
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
vmx->nested.vmxon = true;
@@ -7024,6 +7030,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
+
+out_shadow_vmcs:
+ kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+ return -ENOMEM;
}
/*
@@ -7098,6 +7113,10 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
+ if (vmx->nested.msr_bitmap) {
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+ vmx->nested.msr_bitmap = NULL;
+ }
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8438,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
{
u32 sec_exec_control;
+ /* Postpone execution until vmcs01 is the current VMCS. */
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+ return;
+ }
+
/*
* There is not point to enable virtualize x2apic without enable
* apicv
@@ -9472,8 +9497,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
{
int msr;
struct page *page;
- unsigned long *msr_bitmap;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
@@ -9482,63 +9509,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
WARN_ON(1);
return false;
}
- msr_bitmap = (unsigned long *)kmap(page);
- if (!msr_bitmap) {
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (!msr_bitmap_l1) {
nested_release_page_clean(page);
WARN_ON(1);
return false;
}
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
msr, MSR_TYPE_R);
- /* TPR is allowed */
- nested_vmx_disable_intercept_for_msr(msr_bitmap,
- vmx_msr_bitmap_nested,
+
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W);
+
if (nested_cpu_has_vid(vmcs12)) {
- /* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
}
- } else {
- /*
- * Enable reading intercept of all the x2apic
- * MSRs. We should not rely on vmcs12 to do any
- * optimizations here, it may have been modified
- * by L1.
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- msr,
- MSR_TYPE_R);
-
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
}
kunmap(page);
nested_release_page_clean(page);
@@ -9606,7 +9607,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
maxphyaddr = cpuid_maxphyaddr(vcpu);
if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
(addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
addr_field, maxphyaddr, count, addr);
return -EINVAL;
@@ -9679,13 +9680,13 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
for (i = 0; i < count; i++) {
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
&e, sizeof(e))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
goto fail;
}
if (nested_vmx_load_msr_check(vcpu, &e)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s check failed (%u, 0x%x, 0x%x)\n",
__func__, i, e.index, e.reserved);
goto fail;
@@ -9693,7 +9694,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
msr.index = e.index;
msr.data = e.value;
if (kvm_set_msr(vcpu, &msr)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
goto fail;
@@ -9714,13 +9715,13 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
if (kvm_vcpu_read_guest(vcpu,
gpa + i * sizeof(e),
&e, 2 * sizeof(u32))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
return -EINVAL;
}
if (nested_vmx_store_msr_check(vcpu, &e)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s check failed (%u, 0x%x, 0x%x)\n",
__func__, i, e.index, e.reserved);
return -EINVAL;
@@ -9728,7 +9729,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
msr_info.host_initiated = false;
msr_info.index = e.index;
if (kvm_get_msr(vcpu, &msr_info)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR (%u, 0x%x)\n",
__func__, i, e.index);
return -EINVAL;
@@ -9737,7 +9738,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
gpa + i * sizeof(e) +
offsetof(struct vmx_msr_entry, value),
&msr_info.data, sizeof(msr_info.data))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, msr_info.data);
return -EINVAL;
@@ -9957,10 +9958,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS) {
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
- /* MSR_BITMAP will be set by following vmx_set_efer. */
- } else
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
+ else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
/*
@@ -10011,6 +10012,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
if (enable_vpid) {
/*
@@ -10506,6 +10509,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
}
+ if (nested_cpu_has_ept(vmcs12))
+ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+
if (nested_cpu_has_vid(vmcs12))
vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
@@ -10767,6 +10773,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
else
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
+
+ if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+ vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+ vmx_set_virtual_x2apic_mode(vcpu,
+ vcpu->arch.apic_base & X2APIC_ENABLE);
+ }
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;