diff options
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 139 |
1 files changed, 92 insertions, 47 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2..95c6beb8ce27 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, @@ -1946,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) u64 ns = 0; ktime_t expire; struct kvm_vcpu *vcpu = apic->vcpu; - unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; + u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz; unsigned long flags; ktime_t now; @@ -2455,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) + +int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +{ + if (data & X2APIC_ICR_RESERVED_BITS) + return 1; + + /* + * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but + * only AMD requires it to be zero, Intel essentially just ignores the + * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, + * the CPU performs the reserved bits checks, i.e. the underlying CPU + * behavior will "win". Arbitrarily clear the BUSY bit, as there is no + * sane way to provide consistent behavior with respect to hardware. + */ + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + if (kvm_x86_ops.x2apic_icr_is_split) { + kvm_lapic_set_reg(apic, APIC_ICR, data); + kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); + } else { + kvm_lapic_set_reg64(apic, APIC_ICR, data); + } + trace_kvm_apic_write(APIC_ICR, data); + return 0; +} + +static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) +{ + if (kvm_x86_ops.x2apic_icr_is_split) + return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | + (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; + + return kvm_lapic_get_reg64(apic, APIC_ICR); +} + /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { @@ -2472,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) * maybe-unecessary write, and both are in the noise anyways. */ if (apic_x2apic_mode(apic) && offset == APIC_ICR) - kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); + WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } @@ -2594,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic->apicv_active) { - /* irr_pending is always true when apicv is activated. */ - apic->irr_pending = true; + /* + * When APICv is enabled, KVM must always search the IRR for a pending + * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU + * isn't running. If APICv is disabled, KVM _should_ search the IRR + * for a pending IRQ. But KVM currently doesn't ensure *all* hardware, + * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching + * the IRR at this time could race with IRQ delivery from hardware that + * still sees APICv as being enabled. + * + * FIXME: Ensure other vCPUs and devices observe the change in APICv + * state prior to updating KVM's metadata caches, so that KVM + * can safely search the IRR and set irr_pending accordingly. + */ + apic->irr_pending = true; + + if (apic->apicv_active) apic->isr_count = 1; - } else { - /* - * Don't clear irr_pending, searching the IRR can race with - * updates from the CPU as APICv is still active from hardware's - * perspective. The flag will be cleared as appropriate when - * KVM injects the interrupt. - */ + else apic->isr_count = count_vectors(apic->regs + APIC_ISR); - } + apic->highest_isr_cache = -1; } @@ -2924,14 +2966,13 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) +void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) { - int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (vector == -1) - return -1; + if (WARN_ON_ONCE(vector < 0 || !apic)) + return; /* * We get here even with APIC virtualization enabled, if doing @@ -2959,41 +3000,55 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) __apic_update_ppr(apic, &ppr); } - return vector; } +EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* * In x2APIC mode, the LDR is fixed and based on the id. And - * ICR is internally a single 64-bit register, but needs to be - * split to ICR+ICR2 in userspace for backwards compatibility. + * if the ICR is _not_ split, ICR is internally a single 64-bit + * register, but needs to be split to ICR+ICR2 in userspace for + * backwards compatibility. */ - if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); - - icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | - (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; - __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); - } else { - icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); - __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); + + if (!kvm_x86_ops.x2apic_icr_is_split) { + if (set) { + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; + __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); + } else { + icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); + __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + } } } @@ -3186,22 +3241,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) -{ - data &= ~APIC_ICR_BUSY; - - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); - kvm_lapic_set_reg64(apic, APIC_ICR, data); - trace_kvm_apic_write(APIC_ICR, data); - return 0; -} - static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) { u32 low; if (reg == APIC_ICR) { - *data = kvm_lapic_get_reg64(apic, APIC_ICR); + *data = kvm_x2apic_icr_read(apic); return 0; } |