aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/silicon-errata.txt1
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--arch/arm/kvm/mmu.c2
-rw-r--r--arch/arm64/kvm/hyp/switch.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c10
-rw-r--r--arch/mips/kvm/mmu.c2
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/svm.c406
-rw-r--r--arch/x86/kvm/vmx.c172
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--drivers/iommu/amd_iommu.c484
-rw-r--r--drivers/iommu/amd_iommu_init.c181
-rw-r--r--drivers/iommu/amd_iommu_proto.h1
-rw-r--r--drivers/iommu/amd_iommu_types.h149
-rw-r--r--include/linux/amd-iommu.h43
-rw-r--r--include/linux/irqchip/arm-gic-v3.h1
-rw-r--r--virt/kvm/arm/arch_timer.c22
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c158
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c26
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c8
-rw-r--r--virt/kvm/arm/vgic/vgic.c10
-rw-r--r--virt/kvm/arm/vgic/vgic.h6
-rw-r--r--virt/kvm/eventfd.c22
-rw-r--r--virt/kvm/kvm_main.c6
27 files changed, 1462 insertions, 282 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 4da60b463995..ccc60324e738 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -53,6 +53,7 @@ stable kernels.
| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
| ARM | Cortex-A57 | #852523 | N/A |
| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
+| ARM | Cortex-A72 | #853709 | N/A |
| ARM | MMU-500 | #841119,#826419 | N/A |
| | | | |
| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 46c030a49186..748ef7bdd64d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -460,6 +460,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
driver will print ACPI tables for AMD IOMMU during
IOMMU initialization.
+ amd_iommu_intr= [HW,X86-64]
+ Specifies one of the following AMD IOMMU interrupt
+ remapping modes:
+ legacy - Use legacy interrupt remapping mode.
+ vapic - Use virtual APIC mode, which allows IOMMU
+ to inject interrupts directly into guest.
+ This mode requires kvm-amd.avic=1.
+ (Default when IOMMU HW support is present.)
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bda27b6b1aa2..29d0b23af2a9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
smp_rmb();
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
- if (is_error_pfn(pfn))
+ if (is_error_noslot_pfn(pfn))
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ae7855f16ec2..5a84b4562603 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
/*
* We must restore the 32-bit state before the sysregs, thanks
- * to Cortex-A57 erratum #852523.
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_guest_state(guest_ctxt);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b0b225ceca18..e51367d159d0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters. Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
/* ICC_SRE */
- { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 6cfdcf55572d..121008c0fcc9 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
srcu_idx = srcu_read_lock(&kvm->srcu);
pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_pfn(pfn)) {
+ if (is_error_noslot_pfn(pfn)) {
kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
err = -EFAULT;
goto out;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33ae3a4d0159..4c738c206be3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -781,9 +781,11 @@ struct kvm_arch {
bool disabled_lapic_found;
/* Struct members for AVIC */
+ u32 avic_vm_id;
u32 ldr_mode;
struct page *avic_logical_id_table_page;
struct page *avic_physical_id_table_page;
+ struct hlist_node hnode;
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3235e0fe7792..afa7bbb596cd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -366,7 +366,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(AVX512BW) | F(AVX512VL);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b62c85229711..23b99f305382 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1761,9 +1761,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if (value & MSR_IA32_APICBASE_ENABLE) {
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
static_key_slow_dec_deferred(&apic_hw_disabled);
- } else
+ } else {
static_key_slow_inc(&apic_hw_disabled.key);
- recalculate_apic_map(vcpu->kvm);
+ recalculate_apic_map(vcpu->kvm);
+ }
}
if ((old_value ^ value) & X2APIC_ENABLE) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3d4cc8cc56a3..d9c7e986b4e4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1207,7 +1207,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
*
* Return true if tlb need be flushed.
*/
-static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
+static bool spte_write_protect(u64 *sptep, bool pt_protect)
{
u64 spte = *sptep;
@@ -1233,12 +1233,12 @@ static bool __rmap_write_protect(struct kvm *kvm,
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_write_protect(kvm, sptep, pt_protect);
+ flush |= spte_write_protect(sptep, pt_protect);
return flush;
}
-static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_clear_dirty(u64 *sptep)
{
u64 spte = *sptep;
@@ -1256,12 +1256,12 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_clear_dirty(kvm, sptep);
+ flush |= spte_clear_dirty(sptep);
return flush;
}
-static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_set_dirty(u64 *sptep)
{
u64 spte = *sptep;
@@ -1279,7 +1279,7 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_set_dirty(kvm, sptep);
+ flush |= spte_set_dirty(sptep);
return flush;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d84d102..db77c1ca9e76 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,6 +34,8 @@
#include <linux/sched.h>
#include <linux/trace_events.h>
#include <linux/slab.h>
+#include <linux/amd-iommu.h>
+#include <linux/hashtable.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -41,6 +43,7 @@
#include <asm/desc.h>
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
+#include <asm/irq_remapping.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -96,6 +99,19 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+/* AVIC GATAG is encoded using VM and VCPU IDs */
+#define AVIC_VCPU_ID_BITS 8
+#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
+
+#define AVIC_VM_ID_BITS 24
+#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
+#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
+
+#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
+ (y & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
+
static bool erratum_383_found __read_mostly;
static const u32 host_save_user_msrs[] = {
@@ -185,6 +201,23 @@ struct vcpu_svm {
struct page *avic_backing_page;
u64 *avic_physical_id_cache;
bool avic_is_running;
+
+ /*
+ * Per-vcpu list of struct amd_svm_iommu_ir:
+ * This is used mainly to store interrupt remapping information used
+ * when update the vcpu affinity. This avoids the need to scan for
+ * IRTE and try to match ga_tag in the IOMMU driver.
+ */
+ struct list_head ir_list;
+ spinlock_t ir_list_lock;
+};
+
+/*
+ * This is a wrapper of struct amd_iommu_ir_data.
+ */
+struct amd_svm_iommu_ir {
+ struct list_head node; /* Used by SVM for per-vcpu ir_list */
+ void *data; /* Storing pointer to struct amd_ir_data */
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
@@ -242,6 +275,10 @@ static int avic;
module_param(avic, int, S_IRUGO);
#endif
+/* AVIC VM ID bit masks and lock */
+static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
+static DEFINE_SPINLOCK(avic_vm_id_lock);
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -928,6 +965,55 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+/* Note:
+ * This hash table is used to map VM_ID to a struct kvm_arch,
+ * when handling AMD IOMMU GALOG notification to schedule in
+ * a particular vCPU.
+ */
+#define SVM_VM_DATA_HASH_BITS 8
+DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+static spinlock_t svm_vm_data_hash_lock;
+
+/* Note:
+ * This function is called from IOMMU driver to notify
+ * SVM to schedule in a particular vCPU of a particular VM.
+ */
+static int avic_ga_log_notifier(u32 ga_tag)
+{
+ unsigned long flags;
+ struct kvm_arch *ka = NULL;
+ struct kvm_vcpu *vcpu = NULL;
+ u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
+ u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
+
+ pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
+
+ spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
+ struct kvm_arch *vm_data = &kvm->arch;
+
+ if (vm_data->avic_vm_id != vm_id)
+ continue;
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ break;
+ }
+ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+
+ if (!vcpu)
+ return 0;
+
+ /* Note:
+ * At this point, the IOMMU should have already set the pending
+ * bit in the vAPIC backing page. So, we just need to schedule
+ * in the vcpu.
+ */
+ if (vcpu->mode == OUTSIDE_GUEST_MODE)
+ kvm_vcpu_wake_up(vcpu);
+
+ return 0;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -986,10 +1072,15 @@ static __init int svm_hardware_setup(void)
if (avic) {
if (!npt_enabled ||
!boot_cpu_has(X86_FEATURE_AVIC) ||
- !IS_ENABLED(CONFIG_X86_LOCAL_APIC))
+ !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
avic = false;
- else
+ } else {
pr_info("AVIC enabled\n");
+
+ hash_init(svm_vm_data_hash);
+ spin_lock_init(&svm_vm_data_hash_lock);
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ }
}
return 0;
@@ -1280,18 +1371,54 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+static inline int avic_get_next_vm_id(void)
+{
+ int id;
+
+ spin_lock(&avic_vm_id_lock);
+
+ /* AVIC VM ID is one-based. */
+ id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
+ if (id <= AVIC_VM_ID_MASK)
+ __set_bit(id, avic_vm_id_bitmap);
+ else
+ id = -EAGAIN;
+
+ spin_unlock(&avic_vm_id_lock);
+ return id;
+}
+
+static inline int avic_free_vm_id(int id)
+{
+ if (id <= 0 || id > AVIC_VM_ID_MASK)
+ return -EINVAL;
+
+ spin_lock(&avic_vm_id_lock);
+ __clear_bit(id, avic_vm_id_bitmap);
+ spin_unlock(&avic_vm_id_lock);
+ return 0;
+}
+
static void avic_vm_destroy(struct kvm *kvm)
{
+ unsigned long flags;
struct kvm_arch *vm_data = &kvm->arch;
+ avic_free_vm_id(vm_data->avic_vm_id);
+
if (vm_data->avic_logical_id_table_page)
__free_page(vm_data->avic_logical_id_table_page);
if (vm_data->avic_physical_id_table_page)
__free_page(vm_data->avic_physical_id_table_page);
+
+ spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ hash_del(&vm_data->hnode);
+ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
static int avic_vm_init(struct kvm *kvm)
{
+ unsigned long flags;
int err = -ENOMEM;
struct kvm_arch *vm_data = &kvm->arch;
struct page *p_page;
@@ -1300,6 +1427,10 @@ static int avic_vm_init(struct kvm *kvm)
if (!avic)
return 0;
+ vm_data->avic_vm_id = avic_get_next_vm_id();
+ if (vm_data->avic_vm_id < 0)
+ return vm_data->avic_vm_id;
+
/* Allocating physical APIC ID table (4KB) */
p_page = alloc_page(GFP_KERNEL);
if (!p_page)
@@ -1316,6 +1447,10 @@ static int avic_vm_init(struct kvm *kvm)
vm_data->avic_logical_id_table_page = l_page;
clear_page(page_address(l_page));
+ spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+
return 0;
free_avic:
@@ -1323,31 +1458,34 @@ free_avic:
return err;
}
-/**
- * This function is called during VCPU halt/unhalt.
- */
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+static inline int
+avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
- u64 entry;
- int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
+ int ret = 0;
+ unsigned long flags;
+ struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu);
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
-
- svm->avic_is_running = is_run;
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
+ return 0;
- /* ID = 0xff (broadcast), ID > 0xff (reserved) */
- if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
- return;
+ /*
+ * Here, we go through the per-vcpu ir_list to update all existing
+ * interrupt remapping table entry targeting this vcpu.
+ */
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
- entry = READ_ONCE(*(svm->avic_physical_id_cache));
- WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
+ if (list_empty(&svm->ir_list))
+ goto out;
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
- if (is_run)
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
- WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+ list_for_each_entry(ir, &svm->ir_list, node) {
+ ret = amd_iommu_update_ga(cpu, r, ir->data);
+ if (ret)
+ break;
+ }
+out:
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+ return ret;
}
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1374,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
+ svm->avic_is_running);
}
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1385,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
+/**
+ * This function is called during VCPU halt/unhalt.
+ */
+static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->avic_is_running = is_run;
+ if (is_run)
+ avic_vcpu_load(vcpu, vcpu->cpu);
+ else
+ avic_vcpu_put(vcpu);
+}
+
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1450,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
err = avic_init_backing_page(&svm->vcpu);
if (err)
goto free_page4;
+
+ INIT_LIST_HEAD(&svm->ir_list);
+ spin_lock_init(&svm->ir_list_lock);
}
/* We initialize this flag to true to make sure that the is_running
@@ -4246,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_vcpu_wake_up(vcpu);
}
+static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+ unsigned long flags;
+ struct amd_svm_iommu_ir *cur;
+
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+ list_for_each_entry(cur, &svm->ir_list, node) {
+ if (cur->data != pi->ir_data)
+ continue;
+ list_del(&cur->node);
+ kfree(cur);
+ break;
+ }
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+}
+
+static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct amd_svm_iommu_ir *ir;
+
+ /**
+ * In some cases, the existing irte is updaed and re-set,
+ * so we need to check here if it's already been * added
+ * to the ir_list.
+ */
+ if (pi->ir_data && (pi->prev_ga_tag != 0)) {
+ struct kvm *kvm = svm->vcpu.kvm;
+ u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
+ struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ struct vcpu_svm *prev_svm;
+
+ if (!prev_vcpu) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ prev_svm = to_svm(prev_vcpu);
+ svm_ir_list_del(prev_svm, pi);
+ }
+
+ /**
+ * Allocating new amd_iommu_pi_data, which will get
+ * add to the per-vcpu ir_list.
+ */
+ ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
+ if (!ir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ir->data = pi->ir_data;
+
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+ list_add(&ir->node, &svm->ir_list);
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+out:
+ return ret;
+}
+
+/**
+ * Note:
+ * The HW cannot support posting multicast/broadcast
+ * interrupts to a vCPU. So, we still use legacy interrupt
+ * remapping for these kind of interrupts.
+ *
+ * For lowest-priority interrupts, we only support
+ * those with single CPU as the destination, e.g. user
+ * configures the interrupts via /proc/irq or uses
+ * irqbalance to make the interrupts single-CPU.
+ */
+static int
+get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
+ struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
+{
+ struct kvm_lapic_irq irq;
+ struct kvm_vcpu *vcpu = NULL;
+
+ kvm_set_msi_irq(kvm, e, &irq);
+
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
+ __func__, irq.vector);
+ return -1;
+ }
+
+ pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
+ irq.vector);
+ *svm = to_svm(vcpu);
+ vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+ vcpu_info->vector = irq.vector;
+
+ return 0;
+}
+
+/*
+ * svm_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set)
+{
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+ int idx, ret = -EINVAL;
+
+ if (!kvm_arch_has_assigned_device(kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP))
+ return 0;
+
+ pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
+ __func__, host_irq, guest_irq, set);
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ struct vcpu_data vcpu_info;
+ struct vcpu_svm *svm = NULL;
+
+ if (e->type != KVM_IRQ_ROUTING_MSI)
+ continue;
+
+ /**
+ * Here, we setup with legacy mode in the following cases:
+ * 1. When cannot target interrupt to a specific vcpu.
+ * 2. Unsetting posted interrupt.
+ * 3. APIC virtialization is disabled for the vcpu.
+ */
+ if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
+ kvm_vcpu_apicv_active(&svm->vcpu)) {
+ struct amd_iommu_pi_data pi;
+
+ /* Try to enable guest_mode in IRTE */
+ pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+ pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+ svm->vcpu.vcpu_id);
+ pi.is_guest_mode = true;
+ pi.vcpu_data = &vcpu_info;
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+ /**
+ * Here, we successfully setting up vcpu affinity in
+ * IOMMU guest mode. Now, we need to store the posted
+ * interrupt information in a per-vcpu ir_list so that
+ * we can reference to them directly when we update vcpu
+ * scheduling information in IOMMU irte.
+ */
+ if (!ret && pi.is_guest_mode)
+ svm_ir_list_add(svm, &pi);
+ } else {
+ /* Use legacy mode in IRTE */
+ struct amd_iommu_pi_data pi;
+
+ /**
+ * Here, pi is used to:
+ * - Tell IOMMU to use legacy mode for this interrupt.
+ * - Retrieve ga_tag of prior interrupt remapping data.
+ */
+ pi.is_guest_mode = false;
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+ /**
+ * Check if the posted interrupt was previously
+ * setup with the guest_mode by checking if the ga_tag
+ * was cached. If so, we need to clean up the per-vcpu
+ * ir_list.
+ */
+ if (!ret && pi.prev_ga_tag) {
+ int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, id);
+ if (vcpu)
+ svm_ir_list_del(to_svm(vcpu), &pi);
+ }
+ }
+
+ if (!ret && svm) {
+ trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
+ host_irq, e->gsi,
+ vcpu_info.vector,
+ vcpu_info.pi_desc_addr, set);
+ }
+
+ if (ret < 0) {
+ pr_err("%s: failed to update PI IRTE\n", __func__);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
+
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5078,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
+ .update_pi_irte = svm_update_pi_irte,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a45d8580f91e..f9939d0722b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -422,6 +422,7 @@ struct nested_vmx {
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset;
+ bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
@@ -435,6 +436,8 @@ struct nested_vmx {
bool pi_pending;
u16 posted_intr_nv;
+ unsigned long *msr_bitmap;
+
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -937,6 +939,7 @@ static DEFINE_SPINLOCK(vmx_vpid_lock);
static struct vmcs_config {
int size;
int order;
+ u32 basic_cap;
u32 revision_id;
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
@@ -1213,6 +1216,11 @@ static inline bool cpu_has_vmx_ple(void)
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
}
+static inline bool cpu_has_vmx_basic_inout(void)
+{
+ return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
+}
+
static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
{
return flexpriority_enabled && lapic_in_kernel(vcpu);
@@ -2198,6 +2206,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
new.control) != old.control);
}
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+ vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2256,10 +2270,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* Setup TSC multiplier */
if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
- vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
- }
+ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+ decache_tsc_multiplier(vmx);
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
@@ -2508,7 +2520,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
unsigned long *msr_bitmap;
if (is_guest_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_nested;
+ msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -2871,6 +2883,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
*pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+ if (cpu_has_vmx_basic_inout())
+ *pdata |= VMX_BASIC_INOUT;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
@@ -3451,7 +3465,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_config.size);
+ vmcs_conf->order = get_order(vmcs_conf->size);
+ vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
@@ -6103,7 +6118,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
gla_validity = (exit_qualification >> 7) & 0x3;
- if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
+ if (gla_validity == 0x2) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
@@ -6363,13 +6378,6 @@ static __init int hardware_setup(void)
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
- if (nested) {
- vmx_msr_bitmap_nested =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_nested)
- goto out5;
- }
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
@@ -6392,8 +6400,6 @@ static __init int hardware_setup(void)
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
- if (nested)
- memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
@@ -6529,9 +6535,6 @@ out8:
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6560,6 @@ static __exit void hardware_unsetup(void)
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap);
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
free_kvm_area();
}
@@ -6734,7 +6735,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
{
/* TODO: not to reset guest simply here. */
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+ pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
}
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -6995,16 +6996,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx->nested.msr_bitmap)
+ goto out_msr_bitmap;
+ }
+
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
- return -ENOMEM;
+ goto out_cached_vmcs12;
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs) {
- kfree(vmx->nested.cached_vmcs12);
- return -ENOMEM;
- }
+ if (!shadow_vmcs)
+ goto out_shadow_vmcs;
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -7016,7 +7022,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.vmcs02_num = 0;
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
vmx->nested.vmxon = true;
@@ -7024,6 +7030,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
+
+out_shadow_vmcs:
+ kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+ return -ENOMEM;
}
/*
@@ -7098,6 +7113,10 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
+ if (vmx->nested.msr_bitmap) {
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+ vmx->nested.msr_bitmap = NULL;
+ }
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8438,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
{
u32 sec_exec_control;
+ /* Postpone execution until vmcs01 is the current VMCS. */
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+ return;
+ }
+
/*
* There is not point to enable virtualize x2apic without enable
* apicv
@@ -9472,8 +9497,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
{
int msr;
struct page *page;
- unsigned long *msr_bitmap;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
@@ -9482,63 +9509,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
WARN_ON(1);
return false;
}
- msr_bitmap = (unsigned long *)kmap(page);
- if (!msr_bitmap) {
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (!msr_bitmap_l1) {
nested_release_page_clean(page);
WARN_ON(1);
return false;
}
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
msr, MSR_TYPE_R);
- /* TPR is allowed */
- nested_vmx_disable_intercept_for_msr(msr_bitmap,
- vmx_msr_bitmap_nested,
+
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W);
+
if (nested_cpu_has_vid(vmcs12)) {
- /* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
}
- } else {
- /*
- * Enable reading intercept of all the x2apic
- * MSRs. We should not rely on vmcs12 to do any
- * optimizations here, it may have been modified
- * by L1.
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- msr,
- MSR_TYPE_R);
-
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
}
kunmap(page);
nested_release_page_clean(page);
@@ -9606,7 +9607,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
maxphyaddr = cpuid_maxphyaddr(vcpu);
if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
(addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
addr_field, maxphyaddr, count, addr);
return -EINVAL;
@@ -9679,13 +9680,13 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
for (i = 0; i < count; i++) {
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
&e, sizeof(e))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
goto fail;
}
if (nested_vmx_load_msr_check(vcpu, &e)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s check failed (%u, 0x%x, 0x%x)\n",
__func__, i, e.index, e.reserved);
goto fail;
@@ -9693,7 +9694,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
msr.index = e.index;
msr.data = e.value;
if (kvm_set_msr(vcpu, &msr)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
goto fail;
@@ -9714,13 +9715,13 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
if (kvm_vcpu_read_guest(vcpu,
gpa + i * sizeof(e),
&e, 2 * sizeof(u32))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
return -EINVAL;
}
if (nested_vmx_store_msr_check(vcpu, &e)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s check failed (%u, 0x%x, 0x%x)\n",
__func__, i, e.index, e.reserved);
return -EINVAL;
@@ -9728,7 +9729,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
msr_info.host_initiated = false;
msr_info.index = e.index;
if (kvm_get_msr(vcpu, &msr_info)) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot read MSR (%u, 0x%x)\n",
__func__, i, e.index);
return -EINVAL;
@@ -9737,7 +9738,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
gpa + i * sizeof(e) +
offsetof(struct vmx_msr_entry, value),
&msr_info.data, sizeof(msr_info.data))) {
- pr_warn_ratelimited(
+ pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, msr_info.data);
return -EINVAL;
@@ -9957,10 +9958,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS) {
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
- /* MSR_BITMAP will be set by following vmx_set_efer. */
- } else
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
+ else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
/*
@@ -10011,6 +10012,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
if (enable_vpid) {
/*
@@ -10506,6 +10509,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
}
+ if (nested_cpu_has_ept(vmcs12))
+ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+
if (nested_cpu_has_vid(vmcs12))
vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
@@ -10767,6 +10773,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
else
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
+
+ if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+ vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+ vmx_set_virtual_x2apic_mode(vcpu,
+ vcpu->arch.apic_base & X2APIC_ENABLE);
+ }
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19f9f9e05c2a..57ffe7893104 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6700,7 +6700,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_put_guest_xcr0(vcpu);
- /* Interrupt is enabled by handle_external_intr() */
kvm_x86_ops->handle_external_intr(vcpu);
++vcpu->stat.exits;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a46079..a7aa0e76eebd 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -137,6 +137,7 @@ struct iommu_dev_data {
bool pri_tlp; /* PASID TLB required for
PPR completions */
u32 errata; /* Bitmap for errata to apply */
+ bool use_vapic; /* Enable device to use vapic mode */
};
/*
@@ -707,14 +708,74 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
}
}
+#ifdef CONFIG_IRQ_REMAP
+static int (*iommu_ga_log_notifier)(u32);
+
+int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
+{
+ iommu_ga_log_notifier = notifier;
+
+ return 0;
+}
+EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
+
+static void iommu_poll_ga_log(struct amd_iommu *iommu)
+{
+ u32 head, tail, cnt = 0;
+
+ if (iommu->ga_log == NULL)
+ return;
+
+ head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
+
+ while (head != tail) {
+ volatile u64 *raw;
+ u64 log_entry;
+
+ raw = (u64 *)(iommu->ga_log + head);
+ cnt++;
+
+ /* Avoid memcpy function-call overhead */
+ log_entry = *raw;
+
+ /* Update head pointer of hardware ring-buffer */
+ head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE;
+ writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+
+ /* Handle GA entry */
+ switch (GA_REQ_TYPE(log_entry)) {
+ case GA_GUEST_NR:
+ if (!iommu_ga_log_notifier)
+ break;
+
+ pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n",
+ __func__, GA_DEVID(log_entry),
+ GA_TAG(log_entry));
+
+ if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
+ pr_err("AMD-Vi: GA log notifier failed.\n");
+ break;
+ default:
+ break;
+ }
+ }
+}
+#endif /* CONFIG_IRQ_REMAP */
+
+#define AMD_IOMMU_INT_MASK \
+ (MMIO_STATUS_EVT_INT_MASK | \
+ MMIO_STATUS_PPR_INT_MASK | \
+ MMIO_STATUS_GALOG_INT_MASK)
+
irqreturn_t amd_iommu_int_thread(int irq, void *data)
{
struct amd_iommu *iommu = (struct amd_iommu *) data;
u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
- while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) {
- /* Enable EVT and PPR interrupts again */
- writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK),
+ while (status & AMD_IOMMU_INT_MASK) {
+ /* Enable EVT and PPR and GA interrupts again */
+ writel(AMD_IOMMU_INT_MASK,
iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & MMIO_STATUS_EVT_INT_MASK) {
@@ -727,6 +788,13 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
iommu_poll_ppr_log(iommu);
}
+#ifdef CONFIG_IRQ_REMAP
+ if (status & MMIO_STATUS_GALOG_INT_MASK) {
+ pr_devel("AMD-Vi: Processing IOMMU GA Log\n");
+ iommu_poll_ga_log(iommu);
+ }
+#endif
+
/*
* Hardware bug: ERBT1312
* When re-enabling interrupt (by writing 1
@@ -2948,6 +3016,12 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
if (!iommu)
return;
+#ifdef CONFIG_IRQ_REMAP
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ (dom->type == IOMMU_DOMAIN_UNMANAGED))
+ dev_data->use_vapic = 0;
+#endif
+
iommu_completion_wait(iommu);
}
@@ -2973,6 +3047,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
ret = attach_device(dev, domain);
+#ifdef CONFIG_IRQ_REMAP
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
+ if (dom->type == IOMMU_DOMAIN_UNMANAGED)
+ dev_data->use_vapic = 1;
+ else
+ dev_data->use_vapic = 0;
+ }
+#endif
+
iommu_completion_wait(iommu);
return ret;
@@ -3511,34 +3594,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
*
*****************************************************************************/
-union irte {
- u32 val;
- struct {
- u32 valid : 1,
- no_fault : 1,
- int_type : 3,
- rq_eoi : 1,
- dm : 1,
- rsvd_1 : 1,
- destination : 8,
- vector : 8,
- rsvd_2 : 8;
- } fields;
-};
-
-struct irq_2_irte {
- u16 devid; /* Device ID for IRTE table */
- u16 index; /* Index into IRTE table*/
-};
-
-struct amd_ir_data {
- struct irq_2_irte irq_2_irte;
- union irte irte_entry;
- union {
- struct msi_msg msi_entry;
- };
-};
-
static struct irq_chip amd_ir_chip;
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
@@ -3560,8 +3615,6 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
amd_iommu_dev_table[devid].data[2] = dte;
}
-#define IRTE_ALLOCATED (~1U)
-
static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
{
struct irq_remap_table *table = NULL;
@@ -3607,13 +3660,18 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
goto out;
}
- memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32));
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ memset(table->table, 0,
+ MAX_IRQS_PER_TABLE * sizeof(u32));
+ else
+ memset(table->table, 0,
+ (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
if (ioapic) {
int i;
for (i = 0; i < 32; ++i)
- table->table[i] = IRTE_ALLOCATED;
+ iommu->irte_ops->set_allocated(table, i);
}
irq_lookup_table[devid] = table;
@@ -3639,6 +3697,10 @@ static int alloc_irq_index(u16 devid, int count)
struct irq_remap_table *table;
unsigned long flags;
int index, c;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ if (!iommu)
+ return -ENODEV;
table = get_irq_table(devid, false);
if (!table)
@@ -3650,14 +3712,14 @@ static int alloc_irq_index(u16 devid, int count)
for (c = 0, index = table->min_index;
index < MAX_IRQS_PER_TABLE;
++index) {
- if (table->table[index] == 0)
+ if (!iommu->irte_ops->is_allocated(table, index))
c += 1;
else
c = 0;
if (c == count) {
for (; c != 0; --c)
- table->table[index - c + 1] = IRTE_ALLOCATED;
+ iommu->irte_ops->set_allocated(table, index - c + 1);
index -= count - 1;
goto out;
@@ -3672,7 +3734,42 @@ out:
return index;
}
-static int modify_irte(u16 devid, int index, union irte irte)
+static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
+ struct amd_ir_data *data)
+{
+ struct irq_remap_table *table;
+ struct amd_iommu *iommu;
+ unsigned long flags;
+ struct irte_ga *entry;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu == NULL)
+ return -EINVAL;
+
+ table = get_irq_table(devid, false);
+ if (!table)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&table->lock, flags);
+
+ entry = (struct irte_ga *)table->table;
+ entry = &entry[index];
+ entry->lo.fields_remap.valid = 0;
+ entry->hi.val = irte->hi.val;
+ entry->lo.val = irte->lo.val;
+ entry->lo.fields_remap.valid = 1;
+ if (data)
+ data->ref = entry;
+
+ spin_unlock_irqrestore(&table->lock, flags);
+
+ iommu_flush_irt(iommu, devid);
+ iommu_completion_wait(iommu);
+
+ return 0;
+}
+
+static int modify_irte(u16 devid, int index, union irte *irte)
{
struct irq_remap_table *table;
struct amd_iommu *iommu;
@@ -3687,7 +3784,7 @@ static int modify_irte(u16 devid, int index, union irte irte)
return -ENOMEM;
spin_lock_irqsave(&table->lock, flags);
- table->table[index] = irte.val;
+ table->table[index] = irte->val;
spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
@@ -3711,13 +3808,146 @@ static void free_irte(u16 devid, int index)
return;
spin_lock_irqsave(&table->lock, flags);
- table->table[index] = 0;
+ iommu->irte_ops->clear_allocated(table, index);
spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
}
+static void irte_prepare(void *entry,
+ u32 delivery_mode, u32 dest_mode,
+ u8 vector, u32 dest_apicid, int devid)
+{
+ union irte *irte = (union irte *) entry;
+
+ irte->val = 0;
+ irte->fields.vector = vector;
+ irte->fields.int_type = delivery_mode;
+ irte->fields.destination = dest_apicid;
+ irte->fields.dm = dest_mode;
+ irte->fields.valid = 1;
+}
+
+static void irte_ga_prepare(void *entry,
+ u32 delivery_mode, u32 dest_mode,
+ u8 vector, u32 dest_apicid, int devid)
+{
+ struct irte_ga *irte = (struct irte_ga *) entry;
+ struct iommu_dev_data *dev_data = search_dev_data(devid);
+
+ irte->lo.val = 0;
+ irte->hi.val = 0;
+ irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0;
+ irte->lo.fields_remap.int_type = delivery_mode;
+ irte->lo.fields_remap.dm = dest_mode;
+ irte->hi.fields.vector = vector;
+ irte->lo.fields_remap.destination = dest_apicid;
+ irte->lo.fields_remap.valid = 1;
+}
+
+static void irte_activate(void *entry, u16 devid, u16 index)
+{
+ union irte *irte = (union irte *) entry;
+
+ irte->fields.valid = 1;
+ modify_irte(devid, index, irte);
+}
+
+static void irte_ga_activate(void *entry, u16 devid, u16 index)
+{
+ struct irte_ga *irte = (struct irte_ga *) entry;
+
+ irte->lo.fields_remap.valid = 1;
+ modify_irte_ga(devid, index, irte, NULL);
+}
+
+static void irte_deactivate(void *entry, u16 devid, u16 index)
+{
+ union irte *irte = (union irte *) entry;
+
+ irte->fields.valid = 0;
+ modify_irte(devid, index, irte);
+}
+
+static void irte_ga_deactivate(void *entry, u16 devid, u16 index)
+{
+ struct irte_ga *irte = (struct irte_ga *) entry;
+
+ irte->lo.fields_remap.valid = 0;
+ modify_irte_ga(devid, index, irte, NULL);
+}
+
+static void irte_set_affinity(void *entry, u16 devid, u16 index,
+ u8 vector, u32 dest_apicid)
+{
+ union irte *irte = (union irte *) entry;
+
+ irte->fields.vector = vector;
+ irte->fields.destination = dest_apicid;
+ modify_irte(devid, index, irte);
+}
+
+static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
+ u8 vector, u32 dest_apicid)
+{
+ struct irte_ga *irte = (struct irte_ga *) entry;
+ struct iommu_dev_data *dev_data = search_dev_data(devid);
+
+ if (!dev_data || !dev_data->use_vapic) {
+ irte->hi.fields.vector = vector;
+ irte->lo.fields_remap.destination = dest_apicid;
+ irte->lo.fields_remap.guest_mode = 0;
+ modify_irte_ga(devid, index, irte, NULL);
+ }
+}
+
+#define IRTE_ALLOCATED (~1U)
+static void irte_set_allocated(struct irq_remap_table *table, int index)
+{
+ table->table[index] = IRTE_ALLOCATED;
+}
+
+static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
+{
+ struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *irte = &ptr[index];
+
+ memset(&irte->lo.val, 0, sizeof(u64));
+ memset(&irte->hi.val, 0, sizeof(u64));
+ irte->hi.fields.vector = 0xff;
+}
+
+static bool irte_is_allocated(struct irq_remap_table *table, int index)
+{
+ union irte *ptr = (union irte *)table->table;
+ union irte *irte = &ptr[index];
+
+ return irte->val != 0;
+}
+
+static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
+{
+ struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *irte = &ptr[index];
+
+ return irte->hi.fields.vector != 0;
+}
+
+static void irte_clear_allocated(struct irq_remap_table *table, int index)
+{
+ table->table[index] = 0;
+}
+
+static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
+{
+ struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *irte = &ptr[index];
+
+ memset(&irte->lo.val, 0, sizeof(u64));
+ memset(&irte->hi.val, 0, sizeof(u64));
+}
+
static int get_devid(struct irq_alloc_info *info)
{
int devid = -1;
@@ -3802,19 +4032,17 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data,
{
struct irq_2_irte *irte_info = &data->irq_2_irte;
struct msi_msg *msg = &data->msi_entry;
- union irte *irte = &data->irte_entry;
struct IO_APIC_route_entry *entry;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ if (!iommu)
+ return;
data->irq_2_irte.devid = devid;
data->irq_2_irte.index = index + sub_handle;
-
- /* Setup IRTE for IOMMU */
- irte->val = 0;
- irte->fields.vector = irq_cfg->vector;
- irte->fields.int_type = apic->irq_delivery_mode;
- irte->fields.destination = irq_cfg->dest_apicid;
- irte->fields.dm = apic->irq_dest_mode;
- irte->fields.valid = 1;
+ iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode,
+ apic->irq_dest_mode, irq_cfg->vector,
+ irq_cfg->dest_apicid, devid);
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
@@ -3845,12 +4073,32 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data,
}
}
+struct amd_irte_ops irte_32_ops = {
+ .prepare = irte_prepare,
+ .activate = irte_activate,
+ .deactivate = irte_deactivate,
+ .set_affinity = irte_set_affinity,
+ .set_allocated = irte_set_allocated,
+ .is_allocated = irte_is_allocated,
+ .clear_allocated = irte_clear_allocated,
+};
+
+struct amd_irte_ops irte_128_ops = {
+ .prepare = irte_ga_prepare,
+ .activate = irte_ga_activate,
+ .deactivate = irte_ga_deactivate,
+ .set_affinity = irte_ga_set_affinity,
+ .set_allocated = irte_ga_set_allocated,
+ .is_allocated = irte_ga_is_allocated,
+ .clear_allocated = irte_ga_clear_allocated,
+};
+
static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct irq_alloc_info *info = arg;
struct irq_data *irq_data;
- struct amd_ir_data *data;
+ struct amd_ir_data *data = NULL;
struct irq_cfg *cfg;
int i, ret, devid;
int index = -1;
@@ -3902,6 +4150,16 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (!data)
goto out_free_data;
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ data->entry = kzalloc(sizeof(union irte), GFP_KERNEL);
+ else
+ data->entry = kzalloc(sizeof(struct irte_ga),
+ GFP_KERNEL);
+ if (!data->entry) {
+ kfree(data);
+ goto out_free_data;
+ }
+
irq_data->hwirq = (devid << 16) + i;
irq_data->chip_data = data;
irq_data->chip = &amd_ir_chip;
@@ -3938,6 +4196,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
data = irq_data->chip_data;
irte_info = &data->irq_2_irte;
free_irte(irte_info->devid, irte_info->index);
+ kfree(data->entry);
kfree(data);
}
}
@@ -3949,8 +4208,11 @@ static void irq_remapping_activate(struct irq_domain *domain,
{
struct amd_ir_data *data = irq_data->chip_data;
struct irq_2_irte *irte_info = &data->irq_2_irte;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
- modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+ if (iommu)
+ iommu->irte_ops->activate(data->entry, irte_info->devid,
+ irte_info->index);
}
static void irq_remapping_deactivate(struct irq_domain *domain,
@@ -3958,10 +4220,11 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
{
struct amd_ir_data *data = irq_data->chip_data;
struct irq_2_irte *irte_info = &data->irq_2_irte;
- union irte entry;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
- entry.val = 0;
- modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+ if (iommu)
+ iommu->irte_ops->deactivate(data->entry, irte_info->devid,
+ irte_info->index);
}
static struct irq_domain_ops amd_ir_domain_ops = {
@@ -3971,6 +4234,70 @@ static struct irq_domain_ops amd_ir_domain_ops = {
.deactivate = irq_remapping_deactivate,
};
+static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+{
+ struct amd_iommu *iommu;
+ struct amd_iommu_pi_data *pi_data = vcpu_info;
+ struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
+ struct amd_ir_data *ir_data = data->chip_data;
+ struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
+ struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
+ struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
+
+ /* Note:
+ * This device has never been set up for guest mode.
+ * we should not modify the IRTE
+ */
+ if (!dev_data || !dev_data->use_vapic)
+ return 0;
+
+ pi_data->ir_data = ir_data;
+
+ /* Note:
+ * SVM tries to set up for VAPIC mode, but we are in
+ * legacy mode. So, we force legacy mode instead.
+ */
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
+ pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n",
+ __func__);
+ pi_data->is_guest_mode = false;
+ }
+
+ iommu = amd_iommu_rlookup_table[irte_info->devid];
+ if (iommu == NULL)
+ return -EINVAL;
+
+ pi_data->prev_ga_tag = ir_data->cached_ga_tag;
+ if (pi_data->is_guest_mode) {
+ /* Setting */
+ irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
+ irte->hi.fields.vector = vcpu_pi_info->vector;
+ irte->lo.fields_vapic.guest_mode = 1;
+ irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
+
+ ir_data->cached_ga_tag = pi_data->ga_tag;
+ } else {
+ /* Un-Setting */
+ struct irq_cfg *cfg = irqd_cfg(data);
+
+ irte->hi.val = 0;
+ irte->lo.val = 0;
+ irte->hi.fields.vector = cfg->vector;
+ irte->lo.fields_remap.guest_mode = 0;
+ irte->lo.fields_remap.destination = cfg->dest_apicid;
+ irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
+ irte->lo.fields_remap.dm = apic->irq_dest_mode;
+
+ /*
+ * This communicates the ga_tag back to the caller
+ * so that it can do all the necessary clean up.
+ */
+ ir_data->cached_ga_tag = 0;
+ }
+
+ return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+}
+
static int amd_ir_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
@@ -3978,8 +4305,12 @@ static int amd_ir_set_affinity(struct irq_data *data,
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct irq_cfg *cfg = irqd_cfg(data);
struct irq_data *parent = data->parent_data;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
int ret;
+ if (!iommu)
+ return -ENODEV;
+
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
@@ -3988,9 +4319,8 @@ static int amd_ir_set_affinity(struct irq_data *data,
* Atomically updates the IRTE with the new destination, vector
* and flushes the interrupt entry cache.
*/
- ir_data->irte_entry.fields.vector = cfg->vector;
- ir_data->irte_entry.fields.destination = cfg->dest_apicid;
- modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry);
+ iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
+ irte_info->index, cfg->vector, cfg->dest_apicid);
/*
* After this point, all the interrupts will start arriving
@@ -4012,6 +4342,7 @@ static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
static struct irq_chip amd_ir_chip = {
.irq_ack = ir_ack_apic_edge,
.irq_set_affinity = amd_ir_set_affinity,
+ .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity,
.irq_compose_msi_msg = ir_compose_msi_msg,
};
@@ -4026,4 +4357,43 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
return 0;
}
+
+int amd_iommu_update_ga(int cpu, bool is_run, void *data)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct irq_remap_table *irt;
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ int devid = ir_data->irq_2_irte.devid;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+ !ref || !entry || !entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ if (!iommu)
+ return -ENODEV;
+
+ irt = get_irq_table(devid, false);
+ if (!irt)
+ return -ENODEV;
+
+ spin_lock_irqsave(&irt->lock, flags);
+
+ if (ref->lo.fields_vapic.guest_mode) {
+ if (cpu >= 0)
+ ref->lo.fields_vapic.destination = cpu;
+ ref->lo.fields_vapic.is_run = is_run;
+ barrier();
+ }
+
+ spin_unlock_irqrestore(&irt->lock, flags);
+
+ iommu_flush_irt(iommu, devid);
+ iommu_completion_wait(iommu);
+ return 0;
+}
+EXPORT_SYMBOL(amd_iommu_update_ga);
#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 59741ead7e15..cd1713631a4a 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -84,6 +84,7 @@
#define ACPI_DEVFLAG_LINT1 0x80
#define ACPI_DEVFLAG_ATSDIS 0x10000000
+#define LOOP_TIMEOUT 100000
/*
* ACPI table definitions
*
@@ -145,6 +146,8 @@ struct ivmd_header {
bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
+int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
+
static bool amd_iommu_detected;
static bool __initdata amd_iommu_disabled;
static int amd_iommu_target_ivhd_type;
@@ -386,6 +389,10 @@ static void iommu_disable(struct amd_iommu *iommu)
iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+ /* Disable IOMMU GA_LOG */
+ iommu_feature_disable(iommu, CONTROL_GALOG_EN);
+ iommu_feature_disable(iommu, CONTROL_GAINT_EN);
+
/* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
}
@@ -671,6 +678,99 @@ static void __init free_ppr_log(struct amd_iommu *iommu)
free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
}
+static void free_ga_log(struct amd_iommu *iommu)
+{
+#ifdef CONFIG_IRQ_REMAP
+ if (iommu->ga_log)
+ free_pages((unsigned long)iommu->ga_log,
+ get_order(GA_LOG_SIZE));
+ if (iommu->ga_log_tail)
+ free_pages((unsigned long)iommu->ga_log_tail,
+ get_order(8));
+#endif
+}
+
+static int iommu_ga_log_enable(struct amd_iommu *iommu)
+{
+#ifdef CONFIG_IRQ_REMAP
+ u32 status, i;
+
+ if (!iommu->ga_log)
+ return -EINVAL;
+
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ /* Check if already running */
+ if (status & (MMIO_STATUS_GALOG_RUN_MASK))
+ return 0;
+
+ iommu_feature_enable(iommu, CONTROL_GAINT_EN);
+ iommu_feature_enable(iommu, CONTROL_GALOG_EN);
+
+ for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (status & (MMIO_STATUS_GALOG_RUN_MASK))
+ break;
+ }
+
+ if (i >= LOOP_TIMEOUT)
+ return -EINVAL;
+#endif /* CONFIG_IRQ_REMAP */
+ return 0;
+}
+
+#ifdef CONFIG_IRQ_REMAP
+static int iommu_init_ga_log(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ return 0;
+
+ iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(GA_LOG_SIZE));
+ if (!iommu->ga_log)
+ goto err_out;
+
+ iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(8));
+ if (!iommu->ga_log_tail)
+ goto err_out;
+
+ entry = (u64)virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
+ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
+ &entry, sizeof(entry));
+ entry = ((u64)virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL;
+ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
+ &entry, sizeof(entry));
+ writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
+
+ return 0;
+err_out:
+ free_ga_log(iommu);
+ return -EINVAL;
+}
+#endif /* CONFIG_IRQ_REMAP */
+
+static int iommu_init_ga(struct amd_iommu *iommu)
+{
+ int ret = 0;
+
+#ifdef CONFIG_IRQ_REMAP
+ /* Note: We have already checked GASup from IVRS table.
+ * Now, we need to make sure that GAMSup is set.
+ */
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ !iommu_feature(iommu, FEATURE_GAM_VAPIC))
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+
+ ret = iommu_init_ga_log(iommu);
+#endif /* CONFIG_IRQ_REMAP */
+
+ return ret;
+}
+
static void iommu_enable_gt(struct amd_iommu *iommu)
{
if (!iommu_feature(iommu, FEATURE_GT))
@@ -1144,6 +1244,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
free_command_buffer(iommu);
free_event_buffer(iommu);
free_ppr_log(iommu);
+ free_ga_log(iommu);
iommu_unmap_mmio_space(iommu);
}
@@ -1258,6 +1359,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+ if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
case 0x11:
case 0x40:
@@ -1265,6 +1368,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+ if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0))
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
default:
return -EINVAL;
@@ -1432,6 +1537,7 @@ static int iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
u32 range, misc, low, high;
+ int ret;
iommu->dev = pci_get_bus_and_slot(PCI_BUS_NUM(iommu->devid),
iommu->devid & 0xff);
@@ -1488,6 +1594,10 @@ static int iommu_init_pci(struct amd_iommu *iommu)
if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
+ ret = iommu_init_ga(iommu);
+ if (ret)
+ return ret;
+
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
@@ -1545,16 +1655,24 @@ static void print_iommu_info(void)
dev_name(&iommu->dev->dev), iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("AMD-Vi: Extended features: ");
+ pr_info("AMD-Vi: Extended features (%#llx):\n",
+ iommu->features);
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
pr_cont(" %s", feat_str[i]);
}
+
+ if (iommu->features & FEATURE_GAM_VAPIC)
+ pr_cont(" GA_vAPIC");
+
pr_cont("\n");
}
}
- if (irq_remapping_enabled)
+ if (irq_remapping_enabled) {
pr_info("AMD-Vi: Interrupt remapping enabled\n");
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ pr_info("AMD-Vi: virtual APIC enabled\n");
+ }
}
static int __init amd_iommu_init_pci(void)
@@ -1645,6 +1763,8 @@ enable_faults:
if (iommu->ppr_log != NULL)
iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+ iommu_ga_log_enable(iommu);
+
return 0;
}
@@ -1862,6 +1982,24 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
iommu->stored_addr_lo | 1);
}
+static void iommu_enable_ga(struct amd_iommu *iommu)
+{
+#ifdef CONFIG_IRQ_REMAP
+ switch (amd_iommu_guest_ir) {
+ case AMD_IOMMU_GUEST_IR_VAPIC:
+ iommu_feature_enable(iommu, CONTROL_GAM_EN);
+ /* Fall through */
+ case AMD_IOMMU_GUEST_IR_LEGACY_GA:
+ iommu_feature_enable(iommu, CONTROL_GA_EN);
+ iommu->irte_ops = &irte_128_ops;
+ break;
+ default:
+ iommu->irte_ops = &irte_32_ops;
+ break;
+ }
+#endif
+}
+
/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
@@ -1877,9 +2015,15 @@ static void early_enable_iommus(void)
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
+ iommu_enable_ga(iommu);
iommu_enable(iommu);
iommu_flush_all_caches(iommu);
}
+
+#ifdef CONFIG_IRQ_REMAP
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
+#endif
}
static void enable_iommus_v2(void)
@@ -1905,6 +2049,11 @@ static void disable_iommus(void)
for_each_iommu(iommu)
iommu_disable(iommu);
+
+#ifdef CONFIG_IRQ_REMAP
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
+#endif
}
/*
@@ -2059,7 +2208,7 @@ static int __init early_amd_iommu_init(void)
struct acpi_table_header *ivrs_base;
acpi_size ivrs_size;
acpi_status status;
- int i, ret = 0;
+ int i, remap_cache_sz, ret = 0;
if (!amd_iommu_detected)
return -ENODEV;
@@ -2157,10 +2306,14 @@ static int __init early_amd_iommu_init(void)
* remapping tables.
*/
ret = -ENOMEM;
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
+ else
+ remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
- MAX_IRQS_PER_TABLE * sizeof(u32),
- IRQ_TABLE_ALIGNMENT,
- 0, NULL);
+ remap_cache_sz,
+ IRQ_TABLE_ALIGNMENT,
+ 0, NULL);
if (!amd_iommu_irq_cache)
goto out;
@@ -2413,6 +2566,21 @@ static int __init parse_amd_iommu_dump(char *str)
return 1;
}
+static int __init parse_amd_iommu_intr(char *str)
+{
+ for (; *str; ++str) {
+ if (strncmp(str, "legacy", 6) == 0) {
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
+ break;
+ }
+ if (strncmp(str, "vapic", 5) == 0) {
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
+ break;
+ }
+ }
+ return 1;
+}
+
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
@@ -2521,6 +2689,7 @@ static int __init parse_ivrs_acpihid(char *str)
__setup("amd_iommu_dump", parse_amd_iommu_dump);
__setup("amd_iommu=", parse_amd_iommu_options);
+__setup("amd_iommu_intr=", parse_amd_iommu_intr);
__setup("ivrs_ioapic", parse_ivrs_ioapic);
__setup("ivrs_hpet", parse_ivrs_hpet);
__setup("ivrs_acpihid", parse_ivrs_acpihid);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 0bd9eb374462..faa3b4895cf0 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -38,6 +38,7 @@ extern int amd_iommu_enable(void);
extern void amd_iommu_disable(void);
extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
+extern int amd_iommu_guest_ir;
/* IOMMUv2 specific functions */
struct iommu_domain;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index caf5e3822715..fa766eefd590 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/mutex.h>
+#include <linux/msi.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
@@ -69,6 +70,8 @@
#define MMIO_EXCL_LIMIT_OFFSET 0x0028
#define MMIO_EXT_FEATURES 0x0030
#define MMIO_PPR_LOG_OFFSET 0x0038
+#define MMIO_GA_LOG_BASE_OFFSET 0x00e0
+#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
@@ -76,6 +79,8 @@
#define MMIO_STATUS_OFFSET 0x2020
#define MMIO_PPR_HEAD_OFFSET 0x2030
#define MMIO_PPR_TAIL_OFFSET 0x2038
+#define MMIO_GA_HEAD_OFFSET 0x2040
+#define MMIO_GA_TAIL_OFFSET 0x2048
#define MMIO_CNTR_CONF_OFFSET 0x4000
#define MMIO_CNTR_REG_OFFSET 0x40000
#define MMIO_REG_END_OFFSET 0x80000
@@ -92,6 +97,7 @@
#define FEATURE_GA (1ULL<<7)
#define FEATURE_HE (1ULL<<8)
#define FEATURE_PC (1ULL<<9)
+#define FEATURE_GAM_VAPIC (1ULL<<21)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
@@ -110,6 +116,9 @@
#define MMIO_STATUS_EVT_INT_MASK (1 << 1)
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
+#define MMIO_STATUS_GALOG_RUN_MASK (1 << 8)
+#define MMIO_STATUS_GALOG_OVERFLOW_MASK (1 << 9)
+#define MMIO_STATUS_GALOG_INT_MASK (1 << 10)
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
@@ -146,6 +155,10 @@
#define CONTROL_PPFINT_EN 0x0eULL
#define CONTROL_PPR_EN 0x0fULL
#define CONTROL_GT_EN 0x10ULL
+#define CONTROL_GA_EN 0x11ULL
+#define CONTROL_GAM_EN 0x19ULL
+#define CONTROL_GALOG_EN 0x1CULL
+#define CONTROL_GAINT_EN 0x1DULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
@@ -224,6 +237,19 @@
#define PPR_REQ_FAULT 0x01
+/* Constants for GA Log handling */
+#define GA_LOG_ENTRIES 512
+#define GA_LOG_SIZE_SHIFT 56
+#define GA_LOG_SIZE_512 (0x8ULL << GA_LOG_SIZE_SHIFT)
+#define GA_ENTRY_SIZE 8
+#define GA_LOG_SIZE (GA_ENTRY_SIZE * GA_LOG_ENTRIES)
+
+#define GA_TAG(x) (u32)(x & 0xffffffffULL)
+#define GA_DEVID(x) (u16)(((x) >> 32) & 0xffffULL)
+#define GA_REQ_TYPE(x) (((x) >> 60) & 0xfULL)
+
+#define GA_GUEST_NR 0x1
+
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
@@ -329,6 +355,12 @@
#define IOMMU_CAP_NPCACHE 26
#define IOMMU_CAP_EFR 27
+/* IOMMU Feature Reporting Field (for IVHD type 10h */
+#define IOMMU_FEAT_GASUP_SHIFT 6
+
+/* IOMMU Extended Feature Register (EFR) */
+#define IOMMU_EFR_GASUP_SHIFT 7
+
#define MAX_DOMAIN_ID 65536
/* Protection domain flags */
@@ -400,6 +432,7 @@ struct amd_iommu_fault {
struct iommu_domain;
struct irq_domain;
+struct amd_irte_ops;
/*
* This structure contains generic data for IOMMU protection domains
@@ -490,6 +523,12 @@ struct amd_iommu {
/* Base of the PPR log, if present */
u8 *ppr_log;
+ /* Base of the GA log, if present */
+ u8 *ga_log;
+
+ /* Tail of the GA log, if present */
+ u8 *ga_log_tail;
+
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
@@ -523,6 +562,8 @@ struct amd_iommu {
#ifdef CONFIG_IRQ_REMAP
struct irq_domain *ir_domain;
struct irq_domain *msi_domain;
+
+ struct amd_irte_ops *irte_ops;
#endif
};
@@ -681,4 +722,112 @@ static inline int get_hpet_devid(int id)
return -EINVAL;
}
+enum amd_iommu_intr_mode_type {
+ AMD_IOMMU_GUEST_IR_LEGACY,
+
+ /* This mode is not visible to users. It is used when
+ * we cannot fully enable vAPIC and fallback to only support
+ * legacy interrupt remapping via 128-bit IRTE.
+ */
+ AMD_IOMMU_GUEST_IR_LEGACY_GA,
+ AMD_IOMMU_GUEST_IR_VAPIC,
+};
+
+#define AMD_IOMMU_GUEST_IR_GA(x) (x == AMD_IOMMU_GUEST_IR_VAPIC || \
+ x == AMD_IOMMU_GUEST_IR_LEGACY_GA)
+
+#define AMD_IOMMU_GUEST_IR_VAPIC(x) (x == AMD_IOMMU_GUEST_IR_VAPIC)
+
+union irte {
+ u32 val;
+ struct {
+ u32 valid : 1,
+ no_fault : 1,
+ int_type : 3,
+ rq_eoi : 1,
+ dm : 1,
+ rsvd_1 : 1,
+ destination : 8,
+ vector : 8,
+ rsvd_2 : 8;
+ } fields;
+};
+
+union irte_ga_lo {
+ u64 val;
+
+ /* For int remapping */
+ struct {
+ u64 valid : 1,
+ no_fault : 1,
+ /* ------ */
+ int_type : 3,
+ rq_eoi : 1,
+ dm : 1,
+ /* ------ */
+ guest_mode : 1,
+ destination : 8,
+ rsvd : 48;
+ } fields_remap;
+
+ /* For guest vAPIC */
+ struct {
+ u64 valid : 1,
+ no_fault : 1,
+ /* ------ */
+ ga_log_intr : 1,
+ rsvd1 : 3,
+ is_run : 1,
+ /* ------ */
+ guest_mode : 1,
+ destination : 8,
+ rsvd2 : 16,
+ ga_tag : 32;
+ } fields_vapic;
+};
+
+union irte_ga_hi {
+ u64 val;
+ struct {
+ u64 vector : 8,
+ rsvd_1 : 4,
+ ga_root_ptr : 40,
+ rsvd_2 : 12;
+ } fields;
+};
+
+struct irte_ga {
+ union irte_ga_lo lo;
+ union irte_ga_hi hi;
+};
+
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
+struct amd_ir_data {
+ u32 cached_ga_tag;
+ struct irq_2_irte irq_2_irte;
+ struct msi_msg msi_entry;
+ void *entry; /* Pointer to union irte or struct irte_ga */
+ void *ref; /* Pointer to the actual irte */
+};
+
+struct amd_irte_ops {
+ void (*prepare)(void *, u32, u32, u8, u32, int);
+ void (*activate)(void *, u16, u16);
+ void (*deactivate)(void *, u16, u16);
+ void (*set_affinity)(void *, u16, u16, u8, u32);
+ void *(*get)(struct irq_remap_table *, int);
+ void (*set_allocated)(struct irq_remap_table *, int);
+ bool (*is_allocated)(struct irq_remap_table *, int);
+ void (*clear_allocated)(struct irq_remap_table *, int);
+};
+
+#ifdef CONFIG_IRQ_REMAP
+extern struct amd_irte_ops irte_32_ops;
+extern struct amd_irte_ops irte_128_ops;
+#endif
+
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 2b08e79f5100..09751d349963 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -22,6 +22,20 @@
#include <linux/types.h>
+/*
+ * This is mainly used to communicate information back-and-forth
+ * between SVM and IOMMU for setting up and tearing down posted
+ * interrupt
+ */
+struct amd_iommu_pi_data {
+ u32 ga_tag;
+ u32 prev_ga_tag;
+ u64 base;
+ bool is_guest_mode;
+ struct vcpu_data *vcpu_data;
+ void *ir_data;
+};
+
#ifdef CONFIG_AMD_IOMMU
struct task_struct;
@@ -168,11 +182,34 @@ typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
amd_iommu_invalidate_ctx cb);
-
-#else
+#else /* CONFIG_AMD_IOMMU */
static inline int amd_iommu_detect(void) { return -ENODEV; }
-#endif
+#endif /* CONFIG_AMD_IOMMU */
+
+#if defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP)
+
+/* IOMMU AVIC Function */
+extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
+
+extern int
+amd_iommu_update_ga(int cpu, bool is_run, void *data);
+
+#else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
+
+static inline int
+amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
+{
+ return 0;
+}
+
+static inline int
+amd_iommu_update_ga(int cpu, bool is_run, void *data)
+{
+ return 0;
+}
+
+#endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 56b0b7ec66aa..99ac022edc60 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -337,6 +337,7 @@
*/
#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
#define E_ITS_MAPD_DEVICE_OOR 0x010801
#define E_ITS_MAPC_PROCNUM_OOR 0x010902
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4fde8c7dfcfe..4309b60ebf17 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -31,8 +31,8 @@
#include "trace.h"
static struct timecounter *timecounter;
-static struct workqueue_struct *wqueue;
static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
@@ -140,7 +140,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_RESTART;
}
- queue_work(wqueue, &timer->expired);
+ schedule_work(&timer->expired);
return HRTIMER_NORESTART;
}
@@ -365,7 +365,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
static void kvm_timer_init_interrupt(void *info)
{
- enable_percpu_irq(host_vtimer_irq, 0);
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +432,14 @@ int kvm_timer_hyp_init(void)
}
host_vtimer_irq = info->virtual_irq;
+ host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+ if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+ host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+ kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+ host_vtimer_irq);
+ host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+ }
+
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest timer", kvm_get_running_vcpus());
if (err) {
@@ -440,12 +448,6 @@ int kvm_timer_hyp_init(void)
goto out;
}
- wqueue = create_singlethread_workqueue("kvm_arch_timer");
- if (!wqueue) {
- err = -ENOMEM;
- goto out_free;
- }
-
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
@@ -509,7 +511,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
* VCPUs have the enabled variable set, before entering the guest, if
* the arch timers are enabled.
*/
- if (timecounter && wqueue)
+ if (timecounter)
timer->enabled = 1;
return 0;
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 07411cf967b9..4660a7d04eea 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
if (!irq)
- return NULL;
+ return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
* Find the target VCPU and the LPI number for a given devid/eventid pair
* and make this IRQ pending, possibly injecting it.
* Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
*/
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
- u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid)
{
+ struct kvm_vcpu *vcpu;
struct its_itte *itte;
if (!its->enabled)
- return;
+ return -EBUSY;
itte = find_itte(its, devid, eventid);
- /* Triggering an unmapped IRQ gets silently dropped. */
- if (itte && its_is_collection_mapped(itte->collection)) {
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
- if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
- spin_lock(&itte->irq->irq_lock);
- itte->irq->pending = true;
- vgic_queue_irq_unlock(kvm, itte->irq);
- }
- }
+ if (!itte || !its_is_collection_mapped(itte->collection))
+ return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+ vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+ if (!vcpu)
+ return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+ if (!vcpu->arch.vgic_cpu.lpis_enabled)
+ return -EBUSY;
+
+ spin_lock(&itte->irq->irq_lock);
+ itte->irq->pending = true;
+ vgic_queue_irq_unlock(kvm, itte->irq);
+
+ return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+ struct vgic_io_device *iodev;
+
+ if (dev->ops != &kvm_io_gic_ops)
+ return NULL;
+
+ iodev = container_of(dev, struct vgic_io_device, dev);
+
+ if (iodev->iodev_type != IODEV_ITS)
+ return NULL;
+
+ return iodev;
}
/*
* Queries the KVM IO bus framework to get the ITS pointer from the given
* doorbell address.
* We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
*/
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;
+ int ret;
if (!vgic_has_its(kvm))
return -ENODEV;
@@ -485,15 +509,28 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
if (!kvm_io_dev)
- return -ENODEV;
+ return -EINVAL;
- iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+ iodev = vgic_get_its_iodev(kvm_io_dev);
+ if (!iodev)
+ return -EINVAL;
mutex_lock(&iodev->its->its_lock);
- vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+ ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
mutex_unlock(&iodev->its->its_lock);
- return 0;
+ if (ret < 0)
+ return ret;
+
+ /*
+ * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+ * if the guest has blocked the MSI. So we map any LPI mapping
+ * related error to that.
+ */
+ if (ret)
+ return 0;
+ else
+ return 1;
}
/* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
list_del(&itte->itte_list);
/* This put matches the get in vgic_add_lpi. */
- vgic_put_irq(kvm, itte->irq);
+ if (itte->irq)
+ vgic_put_irq(kvm, itte->irq);
kfree(itte);
}
@@ -697,6 +735,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
struct its_device *device;
struct its_collection *collection, *new_coll = NULL;
int lpi_nr;
+ struct vgic_irq *irq;
device = find_its_device(its, device_id);
if (!device)
@@ -710,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
return E_ITS_MAPTI_PHYSICALID_OOR;
+ /* If there is an existing mapping, behavior is UNPREDICTABLE. */
+ if (find_itte(its, device_id, event_id))
+ return 0;
+
collection = find_collection(its, coll_id);
if (!collection) {
int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
new_coll = collection;
}
- itte = find_itte(its, device_id, event_id);
+ itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
if (!itte) {
- itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
- if (!itte) {
- if (new_coll)
- vgic_its_free_collection(its, coll_id);
- return -ENOMEM;
- }
-
- itte->event_id = event_id;
- list_add_tail(&itte->itte_list, &device->itt_head);
+ if (new_coll)
+ vgic_its_free_collection(its, coll_id);
+ return -ENOMEM;
}
+ itte->event_id = event_id;
+ list_add_tail(&itte->itte_list, &device->itt_head);
+
itte->collection = collection;
itte->lpi = lpi_nr;
- itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+ irq = vgic_add_lpi(kvm, lpi_nr);
+ if (IS_ERR(irq)) {
+ if (new_coll)
+ vgic_its_free_collection(its, coll_id);
+ its_free_itte(kvm, itte);
+ return PTR_ERR(irq);
+ }
+ itte->irq = irq;
+
update_affinity_itte(kvm, itte);
/*
@@ -981,9 +1030,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
u32 msi_data = its_cmd_get_id(its_cmd);
u64 msi_devid = its_cmd_get_deviceid(its_cmd);
- vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
- return 0;
+ return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
}
/*
@@ -1288,13 +1335,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
its_sync_lpi_pending_table(vcpu);
}
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
{
struct vgic_io_device *iodev = &its->iodev;
int ret;
- if (its->initialized)
- return 0;
+ if (!its->initialized)
+ return -EBUSY;
if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
return -ENXIO;
@@ -1311,9 +1358,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
mutex_unlock(&kvm->slots_lock);
- if (!ret)
- its->initialized = true;
-
return ret;
}
@@ -1435,9 +1479,6 @@ static int vgic_its_set_attr(struct kvm_device *dev,
if (type != KVM_VGIC_ITS_ADDR_TYPE)
return -ENODEV;
- if (its->initialized)
- return -EBUSY;
-
if (copy_from_user(&addr, uaddr, sizeof(addr)))
return -EFAULT;
@@ -1453,7 +1494,9 @@ static int vgic_its_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
- return vgic_its_init_its(dev->kvm, its);
+ its->initialized = true;
+
+ return 0;
}
break;
}
@@ -1498,3 +1541,30 @@ int kvm_vgic_register_its_device(void)
return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
KVM_DEV_TYPE_ARM_VGIC_ITS);
}
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+ struct kvm_device *dev;
+ int ret = 0;
+
+ list_for_each_entry(dev, &kvm->devices, vm_node) {
+ if (dev->ops != &kvm_arm_vgic_its_ops)
+ continue;
+
+ ret = vgic_register_its_iodev(kvm, dev->private);
+ if (ret)
+ return ret;
+ /*
+ * We don't need to care about tearing down previously
+ * registered ITSes, as the kvm_io_bus framework removes
+ * them for us if the VM gets destroyed.
+ */
+ }
+
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index ff668e0dd586..90d81811fdda 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 propbaser = dist->propbaser;
+ u64 old_propbaser, propbaser;
/* Storing a value with LPIs already enabled is undefined */
if (vgic_cpu->lpis_enabled)
return;
- propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
- propbaser = vgic_sanitise_propbaser(propbaser);
-
- dist->propbaser = propbaser;
+ do {
+ old_propbaser = dist->propbaser;
+ propbaser = old_propbaser;
+ propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+ propbaser = vgic_sanitise_propbaser(propbaser);
+ } while (cmpxchg64(&dist->propbaser, old_propbaser,
+ propbaser) != old_propbaser);
}
static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 pendbaser = vgic_cpu->pendbaser;
+ u64 old_pendbaser, pendbaser;
/* Storing a value with LPIs already enabled is undefined */
if (vgic_cpu->lpis_enabled)
return;
- pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
- pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
- vgic_cpu->pendbaser = pendbaser;
+ do {
+ old_pendbaser = vgic_cpu->pendbaser;
+ pendbaser = old_pendbaser;
+ pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+ pendbaser = vgic_sanitise_pendbaser(pendbaser);
+ } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+ pendbaser) != old_pendbaser);
}
/*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 0506543df38a..9f0dae397d9c 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm)
goto out;
}
+ if (vgic_has_its(kvm)) {
+ ret = vgic_register_its_iodevs(kvm);
+ if (ret) {
+ kvm_err("Unable to register VGIC ITS MMIO regions\n");
+ goto out;
+ }
+ }
+
dist->ready = true;
out:
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index e7aeac719e09..e83b7fe4baae 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref)
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
- struct vgic_dist *dist;
+ struct vgic_dist *dist = &kvm->arch.vgic;
if (irq->intid < VGIC_MIN_LPI)
return;
- if (!kref_put(&irq->refcount, vgic_irq_release))
+ spin_lock(&dist->lpi_list_lock);
+ if (!kref_put(&irq->refcount, vgic_irq_release)) {
+ spin_unlock(&dist->lpi_list_lock);
return;
+ };
- dist = &kvm->arch.vgic;
-
- spin_lock(&dist->lpi_list_lock);
list_del(&irq->lpi_list);
dist->lpi_list_count--;
spin_unlock(&dist->lpi_list_lock);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 1d8e21d5c13f..6c4625c46368 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu);
int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm,
return -ENODEV;
}
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+ return -ENODEV;
+}
+
static inline bool vgic_has_its(struct kvm *kvm)
{
return false;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e469b6012471..f397e9b20370 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -42,7 +42,6 @@
#ifdef CONFIG_HAVE_KVM_IRQFD
-static struct workqueue_struct *irqfd_cleanup_wq;
static void
irqfd_inject(struct work_struct *work)
@@ -168,7 +167,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
list_del_init(&irqfd->list);
- queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
+ schedule_work(&irqfd->shutdown);
}
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
@@ -555,7 +554,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
* so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns.
*/
- flush_workqueue(irqfd_cleanup_wq);
+ flush_work(&irqfd->shutdown);
return 0;
}
@@ -592,7 +591,7 @@ kvm_irqfd_release(struct kvm *kvm)
* Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference.
*/
- flush_workqueue(irqfd_cleanup_wq);
+ flush_work(&irqfd->shutdown);
}
@@ -622,23 +621,8 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_unlock_irq(&kvm->irqfds.lock);
}
-/*
- * create a host-wide workqueue for issuing deferred shutdown requests
- * aggregated from all vm* instances. We need our own isolated single-thread
- * queue to prevent deadlock against flushing the normal work-queue.
- */
-int kvm_irqfd_init(void)
-{
- irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
- if (!irqfd_cleanup_wq)
- return -ENOMEM;
-
- return 0;
-}
-
void kvm_irqfd_exit(void)
{
- destroy_workqueue(irqfd_cleanup_wq);
}
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 195078225aa5..b3fa12ce1166 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3807,12 +3807,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
* kvm_arch_init makes sure there's at most one caller
* for architectures that support multiple implementations,
* like intel and amd on x86.
- * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
- * conflicts in case kvm is already setup for another implementation.
*/
- r = kvm_irqfd_init();
- if (r)
- goto out_irqfd;
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
@@ -3894,7 +3889,6 @@ out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
kvm_irqfd_exit();
-out_irqfd:
kvm_arch_exit();
out_fail:
return r;