diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/pmu.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 41 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 14 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 12 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 16 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 22 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 29 |
9 files changed, 111 insertions, 43 deletions
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 6e9c40eea208..69ccce308458 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) continue; type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, eventsel = data & ARMV8_PMU_EVTYPE_EVENT; /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + select_idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = eventsel; + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index e18b30ddcdce..ebe1b9fa3c4d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) return container_of(dev, struct vgic_io_device, dev); } -static bool check_region(const struct vgic_register_region *region, +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, gpa_t addr, int len) { - if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) - return true; - if ((region->access_flags & VGIC_ACCESS_32bit) && - len == sizeof(u32) && !(addr & 3)) - return true; - if ((region->access_flags & VGIC_ACCESS_64bit) && - len == sizeof(u64) && !(addr & 7)) - return true; + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } return false; } @@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region || !check_region(region, addr, len)) { + if (!region || !check_region(vcpu->kvm, region, addr, len)) { memset(val, 0, len); return 0; } @@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region) - return 0; - - if (!check_region(region, addr, len)) + if (!region || !check_region(vcpu->kvm, region, addr, len)) return 0; switch (iodev->iodev_type) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 4c34d39d44a0..84961b4e4422 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) /* - * (addr & mask) gives us the byte offset for the INT ID, so we want to - * divide this with 'bytes per irq' to get the INT ID, which is given - * by '(bits) / 8'. But we do this with fixed-point-arithmetic and - * take advantage of the fact that division by a fraction equals - * multiplication with the inverted fraction, and scale up both the - * numerator and denominator with 8 to support at most 64 bits per IRQ: + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of <bits>. + * This assumes that <bits> is a power of two. */ #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ - 64 / (bits) / 8) + 8 >> ilog2(bits)) /* * Some VGIC registers store per-IRQ information, with a different number diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 0a063af40565..9bab86757fa4 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -50,8 +50,10 @@ void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + /* Only SPIs require notification */ + if (vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); } } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 9f0dae397d9c..5c9f9745e6ca 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -41,8 +41,10 @@ void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + /* Only SPIs require notification */ + if (vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); } /* diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 2893d5ba523a..6440b56ec90e 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -273,6 +273,18 @@ retry: * no more work for us to do. */ spin_unlock(&irq->irq_lock); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) + kvm_vcpu_kick(vcpu); return false; } diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index db9668869f6f..efeceb0a222d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -84,12 +84,14 @@ static void async_pf_execute(struct work_struct *work) * mm and might be done in another context, so we must * use FOLL_REMOTE. */ - __get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE); + __get_user_pages_unlocked(NULL, mm, addr, 1, NULL, + FOLL_WRITE | FOLL_REMOTE); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); /* @@ -112,6 +114,8 @@ static void async_pf_execute(struct work_struct *work) void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { + spin_lock(&vcpu->async_pf.lock); + /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = @@ -119,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); list_del(&work->queue); + /* + * We know it's present in vcpu->async_pf.done, do + * nothing here. + */ + if (!work->vcpu) + continue; + + spin_unlock(&vcpu->async_pf.lock); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else @@ -128,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) kmem_cache_free(async_pf_cache, work); } #endif + spin_lock(&vcpu->async_pf.lock); } - spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f397e9b20370..a29786dd9522 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -42,6 +42,7 @@ #ifdef CONFIG_HAVE_KVM_IRQFD +static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) @@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) list_del_init(&irqfd->list); - schedule_work(&irqfd->shutdown); + queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } int __attribute__((weak)) kvm_arch_set_irq_inatomic( @@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * so that we guarantee there will not be any more interrupts on this * gsi once this deassign function returns. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); return 0; } @@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm) * Block until we know all outstanding shutdown jobs have completed * since we do not take a kvm* reference. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); } @@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +/* + * create a host-wide workqueue for issuing deferred shutdown requests + * aggregated from all vm* instances. We need our own isolated + * queue to ease flushing work items when a VM exits. + */ +int kvm_irqfd_init(void) +{ + irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); + if (!irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + void kvm_irqfd_exit(void) { + destroy_workqueue(irqfd_cleanup_wq); } #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 81dfc73d3df3..7f9ee2929cfe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1346,21 +1346,19 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w static int get_user_page_nowait(unsigned long start, int write, struct page **page) { - int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; + int flags = FOLL_NOWAIT | FOLL_HWPOISON; if (write) flags |= FOLL_WRITE; - return __get_user_pages(current, current->mm, start, 1, flags, page, - NULL, NULL); + return get_user_pages(start, 1, flags, page, NULL); } static inline int check_user_page_hwpoison(unsigned long addr) { - int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; + int rc, flags = FOLL_HWPOISON | FOLL_WRITE; - rc = __get_user_pages(current, current->mm, addr, 1, - flags, NULL, NULL, NULL); + rc = get_user_pages(addr, 1, flags, NULL, NULL); return rc == -EHWPOISON; } @@ -1416,10 +1414,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, down_read(¤t->mm->mmap_sem); npages = get_user_page_nowait(addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else + } else { + unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; + + if (write_fault) + flags |= FOLL_WRITE; + npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - write_fault, 0, page, - FOLL_TOUCH|FOLL_HWPOISON); + page, flags); + } if (npages != 1) return npages; @@ -2886,10 +2889,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { - ops->destroy(dev); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); + ops->destroy(dev); return ret; } @@ -3841,7 +3844,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, * kvm_arch_init makes sure there's at most one caller * for architectures that support multiple implementations, * like intel and amd on x86. + * kvm_arch_init must be called before kvm_irqfd_init to avoid creating + * conflicts in case kvm is already setup for another implementation. */ + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; @@ -3923,6 +3931,7 @@ out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: kvm_irqfd_exit(); +out_irqfd: kvm_arch_exit(); out_fail: return r; |