aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c24
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/mtrr.c1
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c194
-rw-r--r--arch/x86/kvm/x86.c59
-rw-r--r--arch/x86/kvm/x86.h7
9 files changed, 161 insertions, 168 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 769af907f824..643565364497 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -181,19 +181,22 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry __user *entries)
{
int r, i;
- struct kvm_cpuid_entry *cpuid_entries;
+ struct kvm_cpuid_entry *cpuid_entries = NULL;
r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
goto out;
r = -ENOMEM;
- cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
- if (!cpuid_entries)
- goto out;
- r = -EFAULT;
- if (copy_from_user(cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry)))
- goto out_free;
+ if (cpuid->nent) {
+ cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) *
+ cpuid->nent);
+ if (!cpuid_entries)
+ goto out;
+ r = -EFAULT;
+ if (copy_from_user(cpuid_entries, entries,
+ cpuid->nent * sizeof(struct kvm_cpuid_entry)))
+ goto out;
+ }
for (i = 0; i < cpuid->nent; i++) {
vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -212,9 +215,8 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
kvm_x86_ops->cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
-out_free:
- vfree(cpuid_entries);
out:
+ vfree(cpuid_entries);
return r;
}
@@ -364,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index e17a74b1d852..35058c2c0eea 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b283ff63..a397200281c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(tsc_deadline - guest_tsc);
+ __delay(min(tsc_deadline - guest_tsc,
+ nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
}
static void start_apic_timer(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24e800116ab4..def97b3a392b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -336,12 +336,12 @@ static gfn_t pse36_gfn_delta(u32 gpte)
#ifdef CONFIG_X86_64
static void __set_spte(u64 *sptep, u64 spte)
{
- *sptep = spte;
+ WRITE_ONCE(*sptep, spte);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
{
- *sptep = spte;
+ WRITE_ONCE(*sptep, spte);
}
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
@@ -390,7 +390,7 @@ static void __set_spte(u64 *sptep, u64 spte)
*/
smp_wmb();
- ssptep->spte_low = sspte.spte_low;
+ WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
@@ -400,7 +400,7 @@ static void __update_clear_spte_fast(u64 *sptep, u64 spte)
ssptep = (union split_spte *)sptep;
sspte = (union split_spte)spte;
- ssptep->spte_low = sspte.spte_low;
+ WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
/*
* If we map the spte from present to nonpresent, we should clear
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index c146f3c262c3..0149ac59c273 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -539,6 +539,7 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
iter->fixed = false;
iter->start_max = iter->start;
+ iter->range = NULL;
iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
__mtrr_lookup_var_next(iter);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2214214c786b..16ef31b87452 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -84,7 +84,7 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define TSC_RATIO_MIN 0x0000000000000001ULL
#define TSC_RATIO_MAX 0x000000ffffffffffULL
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF)
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
/*
* 0xff is broadcast, so the max index allowed for physical APIC ID
@@ -238,7 +238,9 @@ module_param(nested, int, S_IRUGO);
/* enable / disable AVIC */
static int avic;
+#ifdef CONFIG_X86_LOCAL_APIC
module_param(avic, int, S_IRUGO);
+#endif
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -981,11 +983,14 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
- if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
- avic = false;
-
- if (avic)
- pr_info("AVIC enabled\n");
+ if (avic) {
+ if (!npt_enabled ||
+ !boot_cpu_has(X86_FEATURE_AVIC) ||
+ !IS_ENABLED(CONFIG_X86_LOCAL_APIC))
+ avic = false;
+ else
+ pr_info("AVIC enabled\n");
+ }
return 0;
@@ -1324,7 +1329,7 @@ free_avic:
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
u64 entry;
- int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu);
+ int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
if (!kvm_vcpu_apicv_active(vcpu))
@@ -1349,7 +1354,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
/* ID = 0xff (broadcast), ID > 0xff (reserved) */
- int h_physical_id = __default_cpu_present_to_apicid(cpu);
+ int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
if (!kvm_vcpu_apicv_active(vcpu))
@@ -3597,7 +3602,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
- u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
+ u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
@@ -4236,7 +4241,7 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
if (avic_vcpu_is_running(vcpu))
wrmsrl(SVM_AVIC_DOORBELL,
- __default_cpu_present_to_apicid(vcpu->cpu));
+ kvm_cpu_get_apicid(vcpu->cpu));
else
kvm_vcpu_wake_up(vcpu);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e605d1ed334f..df07a0a4611f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
unsigned int dest;
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
return;
do {
@@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
return;
/* Set SN when the vCPU is preempted */
@@ -2418,7 +2420,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested;
- else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
+ else if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
@@ -2703,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_PCOMMIT;
+ SECONDARY_EXEC_XSAVES;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
@@ -3266,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_PCOMMIT |
SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4787,6 +4789,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ if (cpu_has_secondary_exec_ctrls()) {
+ if (kvm_vcpu_apicv_active(vcpu))
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+ else
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmx_set_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -4841,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- /* Currently, we allow L1 guest to directly run pcommit instruction. */
- exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
return exec_control;
}
@@ -4887,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
- if (cpu_has_secondary_exec_ctrls())
+ if (cpu_has_secondary_exec_ctrls()) {
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
+ }
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -4962,6 +4975,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (vmx_xsaves_supported())
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+ if (enable_pml) {
+ ASSERT(vmx->pml_pg);
+ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ }
+
return 0;
}
@@ -6333,23 +6352,20 @@ static __init int hardware_setup(void)
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- if (enable_apicv) {
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr);
-
- /* According SDM, in x2apic mode, the whole id reg is used.
- * But in KVM, it only use the highest eight bits. Need to
- * intercept it */
- vmx_enable_intercept_msr_read_x2apic(0x802);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808);
- /* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b);
- /* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f);
- }
+ for (msr = 0x800; msr <= 0x8ff; msr++)
+ vmx_disable_intercept_msr_read_x2apic(msr);
+
+ /* According SDM, in x2apic mode, the whole id reg is used. But in
+ * KVM, it only use the highest eight bits. Need to intercept it */
+ vmx_enable_intercept_msr_read_x2apic(0x802);
+ /* TMCCT */
+ vmx_enable_intercept_msr_read_x2apic(0x839);
+ /* TPR */
+ vmx_disable_intercept_msr_write_x2apic(0x808);
+ /* EOI */
+ vmx_disable_intercept_msr_write_x2apic(0x80b);
+ /* SELF-IPI */
+ vmx_disable_intercept_msr_write_x2apic(0x83f);
if (enable_ept) {
kvm_mmu_set_mask_ptes(0ull,
@@ -6657,7 +6673,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
/* Checks for #GP/#SS exceptions. */
exn = false;
- if (is_protmode(vcpu)) {
+ if (is_long_mode(vcpu)) {
+ /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+ * non-canonical form. This is the only check on the memory
+ * destination for long mode!
+ */
+ exn = is_noncanonical_address(*ret);
+ } else if (is_protmode(vcpu)) {
/* Protected mode: apply checks for segment validity in the
* following order:
* - segment type check (#GP(0) may be thrown)
@@ -6674,17 +6696,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
* execute-only code segment
*/
exn = ((s.type & 0xa) == 8);
- }
- if (exn) {
- kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
- return 1;
- }
- if (is_long_mode(vcpu)) {
- /* Long mode: #GP(0)/#SS(0) if the memory address is in a
- * non-canonical form. This is an only check for long mode.
- */
- exn = is_noncanonical_address(*ret);
- } else if (is_protmode(vcpu)) {
+ if (exn) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
*/
exn = (s.unusable != 0);
@@ -7545,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
- /* we never catch pcommit instruct for L1 guest. */
- WARN_ON(1);
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7602,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
- [EXIT_REASON_PCOMMIT] = handle_pcommit,
};
static const int kvm_vmx_max_exit_handlers =
@@ -7911,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
- case EXIT_REASON_PCOMMIT:
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
default:
return true;
}
@@ -7924,22 +7929,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
- struct page *pml_pg;
-
- pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pml_pg)
- return -ENOMEM;
-
- vmx->pml_pg = pml_pg;
-
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
- return 0;
-}
-
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@@ -8211,6 +8200,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
+ exit_reason != EXIT_REASON_PML_FULL &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -8841,6 +8831,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
put_cpu();
}
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int r;
+
+ r = vcpu_load(vcpu);
+ BUG_ON(r);
+ vmx_load_vmcs01(vcpu);
+ free_nested(vmx);
+ vcpu_put(vcpu);
+}
+
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8849,8 +8855,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
vmx_destroy_pml_buffer(vmx);
free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
- free_nested(vmx);
+ vmx_free_vcpu_nested(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
@@ -8872,14 +8877,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ err = -ENOMEM;
+
+ /*
+ * If PML is turned on, failure on enabling PML just results in failure
+ * of creating the vcpu, therefore we can simplify PML logic (by
+ * avoiding dealing with cases, such as enabling PML partially on vcpus
+ * for the guest, etc.
+ */
+ if (enable_pml) {
+ vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vmx->pml_pg)
+ goto uninit_vcpu;
+ }
+
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
> PAGE_SIZE);
- err = -ENOMEM;
- if (!vmx->guest_msrs) {
- goto uninit_vcpu;
- }
+ if (!vmx->guest_msrs)
+ goto free_pml;
vmx->loaded_vmcs = &vmx->vmcs01;
vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -8923,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
- /*
- * If PML is turned on, failure on enabling PML just results in failure
- * of creating the vcpu, therefore we can simplify PML logic (by
- * avoiding dealing with cases, such as enabling PML partially on vcpus
- * for the guest, etc.
- */
- if (enable_pml) {
- err = vmx_create_pml_buffer(vmx);
- if (err)
- goto free_vmcs;
- }
-
return &vmx->vcpu;
free_vmcs:
@@ -8942,6 +8947,8 @@ free_vmcs:
free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
+free_pml:
+ vmx_destroy_pml_buffer(vmx);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
@@ -9073,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
- if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
- if (guest_cpuid_has_pcommit(vcpu))
- vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_PCOMMIT;
- else
- vmx->nested.nested_vmx_secondary_ctls_high &=
- ~SECONDARY_EXEC_PCOMMIT;
- }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9694,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_PCOMMIT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
@@ -10702,7 +10699,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
return 0;
vcpu->pre_pcpu = vcpu->cpu;
@@ -10768,7 +10766,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
unsigned long flags;
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
return;
do {
@@ -10821,7 +10820,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
int idx, ret = -EINVAL;
if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(kvm->vcpus[0]))
return 0;
idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c805cf494154..45608a7da9b3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -55,9 +55,6 @@
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/desc.h>
@@ -68,6 +65,9 @@
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
static unsigned long max_tsc_khz;
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
- return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
- vcpu->arch.virtual_tsc_shift);
-}
-
static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
u64 v = (u64)khz * (1000000 + ppm);
@@ -2314,6 +2308,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
+ case MSR_IA32_PERF_CTL:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2972,6 +2967,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SMM))
return -EINVAL;
+ if (events->exception.injected &&
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+ return -EINVAL;
+
process_nmi(vcpu);
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
@@ -3036,6 +3035,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
if (dbgregs->flags)
return -EINVAL;
+ if (dbgregs->dr6 & ~0xffffffffull)
+ return -EINVAL;
+ if (dbgregs->dr7 & ~0xffffffffull)
+ return -EINVAL;
+
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
@@ -5548,9 +5552,10 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
-static void tsc_bad(void *info)
+static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
+ return 0;
}
static void tsc_khz_changed(void *data)
@@ -5655,35 +5660,18 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
-static int kvmclock_cpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int kvmclock_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
- break;
- case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, tsc_bad, NULL, 1);
- break;
- }
- return NOTIFY_OK;
+ tsc_khz_changed(NULL);
+ return 0;
}
-static struct notifier_block kvmclock_cpu_notifier_block = {
- .notifier_call = kvmclock_cpu_notifier,
- .priority = -INT_MAX
-};
-
static void kvm_timer_init(void)
{
int cpu;
max_tsc_khz = tsc_khz;
- cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
@@ -5698,12 +5686,9 @@ static void kvm_timer_init(void)
CPUFREQ_TRANSITION_NOTIFIER);
}
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
- for_each_online_cpu(cpu)
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-
- __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
- cpu_notifier_register_done();
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+ kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -5892,7 +5877,7 @@ void kvm_arch_exit(void)
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
#endif
@@ -7815,7 +7800,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
slot = id_to_memslot(slots, id);
if (size) {
- if (WARN_ON(slot->npages))
+ if (slot->npages)
return -EEXIST;
/*
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634ab5fe..a82ca466b62e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
+#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
extern struct static_key kvm_no_apic_vcpu;
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
+}
+
/* Same "calling convention" as do_div:
* - divide (n << 32) by base
* - put result in n