diff options
Diffstat (limited to 'arch/x86/kvm/svm/avic.c')
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 370 |
1 files changed, 165 insertions, 205 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f52f5e0dd465..b3928150a37c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -53,7 +53,7 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); static u32 next_vm_id = 0; static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); -enum avic_modes avic_mode; +bool x2avic_enabled; /* * This is a wrapper of struct amd_iommu_ir_data. @@ -72,20 +72,25 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - /* Note: - * KVM can support hybrid-AVIC mode, where KVM emulates x2APIC - * MSR accesses, while interrupt injection to a running vCPU - * can be achieved using AVIC doorbell. The AVIC hardware still - * accelerate MMIO accesses, but this does not cause any harm - * as the guest is not supposed to access xAPIC mmio when uses x2APIC. + /* + * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR + * accesses, while interrupt injection to a running vCPU can be + * achieved using AVIC doorbell. KVM disables the APIC access page + * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling + * AVIC in hybrid mode activates only the doorbell mechanism. */ - if (apic_x2apic_mode(svm->vcpu.arch.apic) && - avic_mode == AVIC_MODE_X2) { + if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) { vmcb->control.int_ctl |= X2APIC_MODE_MASK; vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID; /* Disabling MSR intercept for x2APIC registers */ svm_set_x2apic_msr_interception(svm, false); } else { + /* + * Flush the TLB, the guest may have inserted a non-APIC + * mapping into the TLB while AVIC was disabled. + */ + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); + /* For xAVIC and hybrid-xAVIC modes */ vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID; /* Enabling MSR intercept for x2APIC registers */ @@ -241,8 +246,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, u64 *avic_physical_id_table; struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); - if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) || - (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID)) + if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) || + (index > X2AVIC_MAX_PHYSICAL_ID)) return NULL; avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); @@ -250,47 +255,14 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, return &avic_physical_id_table[index]; } -/* - * Note: - * AVIC hardware walks the nested page table to check permissions, - * but does not use the SPA address specified in the leaf page - * table entry since it uses address in the AVIC_BACKING_PAGE pointer - * field of the VMCB. Therefore, we set up the - * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here. - */ -static int avic_alloc_access_page(struct kvm *kvm) -{ - void __user *ret; - int r = 0; - - mutex_lock(&kvm->slots_lock); - - if (kvm->arch.apic_access_memslot_enabled) - goto out; - - ret = __x86_set_memory_region(kvm, - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); - if (IS_ERR(ret)) { - r = PTR_ERR(ret); - goto out; - } - - kvm->arch.apic_access_memslot_enabled = true; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - static int avic_init_backing_page(struct kvm_vcpu *vcpu) { u64 *entry, new_entry; int id = vcpu->vcpu_id; struct vcpu_svm *svm = to_svm(vcpu); - if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) || - (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID)) + if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || + (id > X2AVIC_MAX_PHYSICAL_ID)) return -EINVAL; if (!vcpu->arch.apic->regs) @@ -299,7 +271,13 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) if (kvm_apicv_activated(vcpu->kvm)) { int ret; - ret = avic_alloc_access_page(vcpu->kvm); + /* + * Note, AVIC hardware walks the nested page table to check + * permissions, but does not use the SPA address specified in + * the leaf SPTE since it uses address in the AVIC_BACKING_PAGE + * pointer field of the VMCB. + */ + ret = kvm_alloc_apic_access_page(vcpu->kvm); if (ret) return ret; } @@ -339,6 +317,60 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) put_cpu(); } + +static void avic_kick_vcpu(struct kvm_vcpu *vcpu, u32 icrl) +{ + vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); +} + +static void avic_kick_vcpu_by_physical_id(struct kvm *kvm, u32 physical_id, + u32 icrl) +{ + /* + * KVM inhibits AVIC if any vCPU ID diverges from the vCPUs APIC ID, + * i.e. APIC ID == vCPU ID. + */ + struct kvm_vcpu *target_vcpu = kvm_get_vcpu_by_id(kvm, physical_id); + + /* Once again, nothing to do if the target vCPU doesn't exist. */ + if (unlikely(!target_vcpu)) + return; + + avic_kick_vcpu(target_vcpu, icrl); +} + +static void avic_kick_vcpu_by_logical_id(struct kvm *kvm, u32 *avic_logical_id_table, + u32 logid_index, u32 icrl) +{ + u32 physical_id; + + if (avic_logical_id_table) { + u32 logid_entry = avic_logical_id_table[logid_index]; + + /* Nothing to do if the logical destination is invalid. */ + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) + return; + + physical_id = logid_entry & + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + } else { + /* + * For x2APIC, the logical APIC ID is a read-only value that is + * derived from the x2APIC ID, thus the x2APIC ID can be found + * by reversing the calculation (stored in logid_index). Note, + * bits 31:20 of the x2APIC ID aren't propagated to the logical + * ID, but KVM limits the x2APIC ID limited to KVM_MAX_VCPU_IDS. + */ + physical_id = logid_index; + } + + avic_kick_vcpu_by_physical_id(kvm, physical_id, icrl); +} + /* * A fast-path version of avic_kick_target_vcpus(), which attempts to match * destination APIC ID to vCPU without looping through all vCPUs. @@ -346,11 +378,10 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { - u32 l1_physical_id, dest; - struct kvm_vcpu *target_vcpu; int dest_mode = icrl & APIC_DEST_MASK; int shorthand = icrl & APIC_SHORT_MASK; struct kvm_svm *kvm_svm = to_kvm_svm(kvm); + u32 dest; if (shorthand != APIC_DEST_NOSHORT) return -EINVAL; @@ -367,18 +398,18 @@ static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) return -EINVAL; - l1_physical_id = dest; - - if (WARN_ON_ONCE(l1_physical_id != index)) + if (WARN_ON_ONCE(dest != index)) return -EINVAL; + avic_kick_vcpu_by_physical_id(kvm, dest, icrl); } else { - u32 bitmap, cluster; - int logid_index; + u32 *avic_logical_id_table; + unsigned long bitmap, i; + u32 cluster; if (apic_x2apic_mode(source)) { /* 16 bit dest mask, 16 bit cluster id */ - bitmap = dest & 0xFFFF0000; + bitmap = dest & 0xFFFF; cluster = (dest >> 16) << 4; } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { /* 8 bit dest mask*/ @@ -390,67 +421,32 @@ static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source cluster = (dest >> 4) << 2; } + /* Nothing to do if there are no destinations in the cluster. */ if (unlikely(!bitmap)) - /* guest bug: nobody to send the logical interrupt to */ return 0; - if (!is_power_of_2(bitmap)) - /* multiple logical destinations, use slow path */ - return -EINVAL; - - logid_index = cluster + __ffs(bitmap); - - if (!apic_x2apic_mode(source)) { - u32 *avic_logical_id_table = - page_address(kvm_svm->avic_logical_id_table_page); - - u32 logid_entry = avic_logical_id_table[logid_index]; - - if (WARN_ON_ONCE(index != logid_index)) - return -EINVAL; - - /* guest bug: non existing/reserved logical destination */ - if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) - return 0; - - l1_physical_id = logid_entry & - AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; - } else { - /* - * For x2APIC logical mode, cannot leverage the index. - * Instead, calculate physical ID from logical ID in ICRH. - */ - int cluster = (icrh & 0xffff0000) >> 16; - int apic = ffs(icrh & 0xffff) - 1; - - /* - * If the x2APIC logical ID sub-field (i.e. icrh[15:0]) - * contains anything but a single bit, we cannot use the - * fast path, because it is limited to a single vCPU. - */ - if (apic < 0 || icrh != (1 << apic)) - return -EINVAL; + if (apic_x2apic_mode(source)) + avic_logical_id_table = NULL; + else + avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); - l1_physical_id = (cluster << 4) + apic; - } + /* + * AVIC is inhibited if vCPUs aren't mapped 1:1 with logical + * IDs, thus each bit in the destination is guaranteed to map + * to at most one vCPU. + */ + for_each_set_bit(i, &bitmap, 16) + avic_kick_vcpu_by_logical_id(kvm, avic_logical_id_table, + cluster + i, icrl); } - target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); - if (unlikely(!target_vcpu)) - /* guest bug: non existing vCPU is a target of this IPI*/ - return 0; - - target_vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(target_vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); return 0; } static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { + u32 dest = apic_x2apic_mode(source) ? icrh : GET_XAPIC_DEST_FIELD(icrh); unsigned long i; struct kvm_vcpu *vcpu; @@ -466,21 +462,9 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, * since entered the guest will have processed pending IRQs at VMRUN. */ kvm_for_each_vcpu(i, vcpu, kvm) { - u32 dest; - - if (apic_x2apic_mode(vcpu->arch.apic)) - dest = icrh; - else - dest = GET_XAPIC_DEST_FIELD(icrh); - if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK, - dest, icrl & APIC_DEST_MASK)) { - vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); - } + dest, icrl & APIC_DEST_MASK)) + avic_kick_vcpu(vcpu, icrl); } } @@ -496,14 +480,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index); switch (id) { + case AVIC_IPI_FAILURE_INVALID_TARGET: case AVIC_IPI_FAILURE_INVALID_INT_TYPE: /* * Emulate IPIs that are not handled by AVIC hardware, which - * only virtualizes Fixed, Edge-Triggered INTRs. The exit is - * a trap, e.g. ICR holds the correct value and RIP has been - * advanced, KVM is responsible only for emulating the IPI. - * Sadly, hardware may sometimes leave the BUSY flag set, in - * which case KVM needs to emulate the ICR write as well in + * only virtualizes Fixed, Edge-Triggered INTRs, and falls over + * if _any_ targets are invalid, e.g. if the logical mode mask + * is a superset of running vCPUs. + * + * The exit is a trap, e.g. ICR holds the correct value and RIP + * has been advanced, KVM is responsible only for emulating the + * IPI. Sadly, hardware may sometimes leave the BUSY flag set, + * in which case KVM needs to emulate the ICR write as well in * order to clear the BUSY flag. */ if (icrl & APIC_ICR_BUSY) @@ -519,8 +507,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) */ avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index); break; - case AVIC_IPI_FAILURE_INVALID_TARGET: - break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); break; @@ -541,33 +527,33 @@ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu) static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) { struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); - int index; u32 *logical_apic_id_table; - int dlid = GET_APIC_LOGICAL_ID(ldr); - - if (!dlid) - return NULL; + u32 cluster, index; - if (flat) { /* flat */ - index = ffs(dlid) - 1; - if (index > 7) - return NULL; - } else { /* cluster */ - int cluster = (dlid & 0xf0) >> 4; - int apic = ffs(dlid & 0x0f) - 1; + ldr = GET_APIC_LOGICAL_ID(ldr); - if ((apic < 0) || (apic > 7) || - (cluster >= 0xf)) + if (flat) { + cluster = 0; + } else { + cluster = (ldr >> 4); + if (cluster >= 0xf) return NULL; - index = (cluster << 2) + apic; + ldr &= 0xf; } + if (!ldr || !is_power_of_2(ldr)) + return NULL; + + index = __ffs(ldr); + if (WARN_ON_ONCE(index > 7)) + return NULL; + index += (cluster << 2); logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); return &logical_apic_id_table[index]; } -static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) +static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) { bool flat; u32 *entry, new_entry; @@ -575,15 +561,13 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT; entry = avic_get_logical_id_entry(vcpu, ldr, flat); if (!entry) - return -EINVAL; + return; new_entry = READ_ONCE(*entry); new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; WRITE_ONCE(*entry, new_entry); - - return 0; } static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) @@ -601,29 +585,23 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry); } -static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) +static void avic_handle_ldr_update(struct kvm_vcpu *vcpu) { - int ret = 0; struct vcpu_svm *svm = to_svm(vcpu); u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); u32 id = kvm_xapic_id(vcpu->arch.apic); /* AVIC does not support LDR update for x2APIC */ if (apic_x2apic_mode(vcpu->arch.apic)) - return 0; + return; if (ldr == svm->ldr_reg) - return 0; + return; avic_invalidate_logical_id_entry(vcpu); - if (ldr) - ret = avic_ldr_write(vcpu, id, ldr); - - if (!ret) - svm->ldr_reg = ldr; - - return ret; + svm->ldr_reg = ldr; + avic_ldr_write(vcpu, id, ldr); } static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) @@ -645,12 +623,14 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) switch (offset) { case APIC_LDR: - if (avic_handle_ldr_update(vcpu)) - return 0; + avic_handle_ldr_update(vcpu); break; case APIC_DFR: avic_handle_dfr_update(vcpu); break; + case APIC_RRR: + /* Ignore writes to Read Remote Data, it's read-only. */ + return 1; default: break; } @@ -739,18 +719,6 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) avic_handle_ldr_update(vcpu); } -void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu) -{ - if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE) - return; - - if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) { - WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id); - return; - } - avic_refresh_apicv_exec_ctrl(vcpu); -} - static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) { int ret = 0; @@ -995,23 +963,6 @@ out: return ret; } -bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) -{ - ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | - BIT(APICV_INHIBIT_REASON_ABSENT) | - BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_NESTED) | - BIT(APICV_INHIBIT_REASON_IRQWIN) | - BIT(APICV_INHIBIT_REASON_PIT_REINJ) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | - BIT(APICV_INHIBIT_REASON_SEV) | - BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | - BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); - - return supported & BIT(reason); -} - - static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) { @@ -1064,6 +1015,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); + WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); @@ -1092,17 +1044,15 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } - -void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb01.ptr; - bool activated = kvm_vcpu_apicv_active(vcpu); - if (!enable_apicv) + if (!lapic_in_kernel(vcpu) || !enable_apicv) return; - if (activated) { + if (kvm_vcpu_apicv_active(vcpu)) { /** * During AVIC temporary deactivation, guest could update * APIC ID, DFR and LDR registers, which would not be trapped @@ -1116,6 +1066,16 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) avic_deactivate_vmcb(svm); } vmcb_mark_dirty(vmcb, VMCB_AVIC); +} + +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + bool activated = kvm_vcpu_apicv_active(vcpu); + + if (!enable_apicv) + return; + + avic_refresh_virtual_apic_mode(vcpu); if (activated) avic_vcpu_load(vcpu, vcpu->cpu); @@ -1165,32 +1125,32 @@ bool avic_hardware_setup(struct kvm_x86_ops *x86_ops) if (!npt_enabled) return false; + /* AVIC is a prerequisite for x2AVIC. */ + if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) { + if (boot_cpu_has(X86_FEATURE_X2AVIC)) { + pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled"); + pr_warn(FW_BUG "Try enable AVIC using force_avic option"); + } + return false; + } + if (boot_cpu_has(X86_FEATURE_AVIC)) { - avic_mode = AVIC_MODE_X1; pr_info("AVIC enabled\n"); } else if (force_avic) { /* * Some older systems does not advertise AVIC support. * See Revision Guide for specific AMD processor for more detail. */ - avic_mode = AVIC_MODE_X1; pr_warn("AVIC is not supported in CPUID but force enabled"); pr_warn("Your system might crash and burn"); } /* AVIC is a prerequisite for x2AVIC. */ - if (boot_cpu_has(X86_FEATURE_X2AVIC)) { - if (avic_mode == AVIC_MODE_X1) { - avic_mode = AVIC_MODE_X2; - pr_info("x2AVIC enabled\n"); - } else { - pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled"); - pr_warn(FW_BUG "Try enable AVIC using force_avic option"); - } - } + x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC); + if (x2avic_enabled) + pr_info("x2AVIC enabled\n"); - if (avic_mode != AVIC_MODE_NONE) - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); - return !!avic_mode; + return true; } |