aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/lapic.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r--arch/x86/kvm/lapic.c139
1 files changed, 92 insertions, 47 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a7172ba59ad2..95c6beb8ce27 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
* reversing the LDR calculation to get cluster of APICs, i.e. no
* additional work is required.
*/
- if (apic_x2apic_mode(apic)) {
- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+ if (apic_x2apic_mode(apic))
return;
- }
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
&cluster, &mask))) {
@@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
+ pr_info_once(
"vcpu %i: requested %lld ns "
"lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,
@@ -1946,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
u64 ns = 0;
ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+ u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
unsigned long flags;
ktime_t now;
@@ -2455,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
+
+int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+{
+ if (data & X2APIC_ICR_RESERVED_BITS)
+ return 1;
+
+ /*
+ * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
+ * only AMD requires it to be zero, Intel essentially just ignores the
+ * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
+ * the CPU performs the reserved bits checks, i.e. the underlying CPU
+ * behavior will "win". Arbitrarily clear the BUSY bit, as there is no
+ * sane way to provide consistent behavior with respect to hardware.
+ */
+ data &= ~APIC_ICR_BUSY;
+
+ kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ if (kvm_x86_ops.x2apic_icr_is_split) {
+ kvm_lapic_set_reg(apic, APIC_ICR, data);
+ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
+ } else {
+ kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ }
+ trace_kvm_apic_write(APIC_ICR, data);
+ return 0;
+}
+
+static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
+{
+ if (kvm_x86_ops.x2apic_icr_is_split)
+ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
+ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
+
+ return kvm_lapic_get_reg64(apic, APIC_ICR);
+}
+
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
@@ -2472,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
* maybe-unecessary write, and both are in the noise anyways.
*/
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
- kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR));
+ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
@@ -2594,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic->apicv_active) {
- /* irr_pending is always true when apicv is activated. */
- apic->irr_pending = true;
+ /*
+ * When APICv is enabled, KVM must always search the IRR for a pending
+ * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
+ * isn't running. If APICv is disabled, KVM _should_ search the IRR
+ * for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
+ * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
+ * the IRR at this time could race with IRQ delivery from hardware that
+ * still sees APICv as being enabled.
+ *
+ * FIXME: Ensure other vCPUs and devices observe the change in APICv
+ * state prior to updating KVM's metadata caches, so that KVM
+ * can safely search the IRR and set irr_pending accordingly.
+ */
+ apic->irr_pending = true;
+
+ if (apic->apicv_active)
apic->isr_count = 1;
- } else {
- /*
- * Don't clear irr_pending, searching the IRR can race with
- * updates from the CPU as APICv is still active from hardware's
- * perspective. The flag will be cleared as appropriate when
- * KVM injects the interrupt.
- */
+ else
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
- }
+
apic->highest_isr_cache = -1;
}
@@ -2924,14 +2966,13 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
}
}
-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
+void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
{
- int vector = kvm_apic_has_interrupt(vcpu);
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (vector == -1)
- return -1;
+ if (WARN_ON_ONCE(vector < 0 || !apic))
+ return;
/*
* We get here even with APIC virtualization enabled, if doing
@@ -2959,41 +3000,55 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
__apic_update_ppr(apic, &ppr);
}
- return vector;
}
+EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt);
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
+ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
u64 icr;
if (vcpu->kvm->arch.x2apic_format) {
- if (*id != vcpu->vcpu_id)
+ if (*id != x2apic_id)
return -EINVAL;
} else {
+ /*
+ * Ignore the userspace value when setting APIC state.
+ * KVM's model is that the x2APIC ID is readonly, e.g.
+ * KVM only supports delivering interrupts to KVM's
+ * version of the x2APIC ID. However, for backwards
+ * compatibility, don't reject attempts to set a
+ * mismatched ID for userspace that hasn't opted into
+ * x2apic_format.
+ */
if (set)
- *id >>= 24;
+ *id = x2apic_id;
else
- *id <<= 24;
+ *id = x2apic_id << 24;
}
/*
* In x2APIC mode, the LDR is fixed and based on the id. And
- * ICR is internally a single 64-bit register, but needs to be
- * split to ICR+ICR2 in userspace for backwards compatibility.
+ * if the ICR is _not_ split, ICR is internally a single 64-bit
+ * register, but needs to be split to ICR+ICR2 in userspace for
+ * backwards compatibility.
*/
- if (set) {
- *ldr = kvm_apic_calc_x2apic_ldr(*id);
-
- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
- } else {
- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ if (set)
+ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
+
+ if (!kvm_x86_ops.x2apic_icr_is_split) {
+ if (set) {
+ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+ } else {
+ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ }
}
}
@@ -3186,22 +3241,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
return 0;
}
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
-{
- data &= ~APIC_ICR_BUSY;
-
- kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
- kvm_lapic_set_reg64(apic, APIC_ICR, data);
- trace_kvm_apic_write(APIC_ICR, data);
- return 0;
-}
-
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
{
u32 low;
if (reg == APIC_ICR) {
- *data = kvm_lapic_get_reg64(apic, APIC_ICR);
+ *data = kvm_x2apic_icr_read(apic);
return 0;
}