aboutsummaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c330
1 files changed, 240 insertions, 90 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b50dbe269f4b..3f6d450355f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -155,6 +155,8 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
+static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
+
__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@@ -189,16 +191,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
return true;
}
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (!PageTransCompoundMap(page))
- return false;
-
- return is_transparent_hugepage(compound_head(page));
-}
-
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
@@ -245,11 +237,8 @@ static void ack_flush(void *_completed)
{
}
-static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+static inline bool kvm_kick_many_cpus(struct cpumask *cpus, bool wait)
{
- if (unlikely(!cpus))
- cpus = cpu_online_mask;
-
if (cpumask_empty(cpus))
return false;
@@ -257,33 +246,55 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
return true;
}
+static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int req, struct cpumask *tmp,
+ int current_cpu)
+{
+ int cpu;
+
+ kvm_make_request(req, vcpu);
+
+ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ return;
+
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point
+ * after kvm_request_needs_ipi(), which could result in sending an IPI
+ * to the previous pCPU. But, that's OK because the purpose of the IPI
+ * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is
+ * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES
+ * after this point is also OK, as the requirement is only that KVM wait
+ * for vCPUs that were reading SPTEs _before_ any changes were
+ * finalized. See kvm_vcpu_kick() for more details on handling requests.
+ */
+ if (kvm_request_needs_ipi(vcpu, req)) {
+ cpu = READ_ONCE(vcpu->cpu);
+ if (cpu != -1 && cpu != current_cpu)
+ __cpumask_set_cpu(cpu, tmp);
+ }
+}
+
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except,
- unsigned long *vcpu_bitmap, cpumask_var_t tmp)
+ unsigned long *vcpu_bitmap)
{
- int i, cpu, me;
struct kvm_vcpu *vcpu;
+ struct cpumask *cpus;
+ int i, me;
bool called;
me = get_cpu();
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
- vcpu == except)
- continue;
-
- kvm_make_request(req, vcpu);
- cpu = vcpu->cpu;
+ cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask);
+ cpumask_clear(cpus);
- if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
+ vcpu = kvm_get_vcpu(kvm, i);
+ if (!vcpu)
continue;
-
- if (tmp != NULL && cpu != -1 && cpu != me &&
- kvm_request_needs_ipi(vcpu, req))
- __cpumask_set_cpu(cpu, tmp);
+ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
}
- called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
+ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
put_cpu();
return called;
@@ -292,14 +303,25 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except)
{
- cpumask_var_t cpus;
+ struct kvm_vcpu *vcpu;
+ struct cpumask *cpus;
bool called;
+ int i, me;
- zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+ me = get_cpu();
- called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
+ cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask);
+ cpumask_clear(cpus);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu == except)
+ continue;
+ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
+ }
+
+ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
+ put_cpu();
- free_cpumask_var(cpus);
return called;
}
@@ -312,11 +334,7 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- /*
- * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
- * kvm_make_all_cpus_request.
- */
- long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
+ ++kvm->stat.generic.remote_tlb_flush_requests;
/*
* We want to publish modifications to the page tables before reading
@@ -332,7 +350,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
if (!kvm_arch_flush_remote_tlb(kvm)
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.generic.remote_tlb_flush;
- cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
@@ -415,6 +432,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->preempted = false;
vcpu->ready = false;
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+ vcpu->last_used_slot = 0;
}
void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -496,17 +514,6 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
idx = srcu_read_lock(&kvm->srcu);
- /* The on_lock() path does not yet support lock elision. */
- if (!IS_KVM_NULL_FN(range->on_lock)) {
- locked = true;
- KVM_MMU_LOCK(kvm);
-
- range->on_lock(kvm, range->start, range->end);
-
- if (IS_KVM_NULL_FN(range->handler))
- goto out_unlock;
- }
-
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, slots) {
@@ -538,15 +545,18 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
if (!locked) {
locked = true;
KVM_MMU_LOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_lock))
+ range->on_lock(kvm, range->start, range->end);
+ if (IS_KVM_NULL_FN(range->handler))
+ break;
}
ret |= range->handler(kvm, &gfn_range);
}
}
- if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
+ if (range->flush_on_ret && ret)
kvm_flush_remote_tlbs(kvm);
-out_unlock:
if (locked)
KVM_MMU_UNLOCK(kvm);
@@ -604,16 +614,20 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
trace_kvm_set_spte_hva(address);
/*
- * .change_pte() must be surrounded by .invalidate_range_{start,end}(),
- * and so always runs with an elevated notifier count. This obviates
- * the need to bump the sequence count.
+ * .change_pte() must be surrounded by .invalidate_range_{start,end}().
+ * If mmu_notifier_count is zero, then no in-progress invalidations,
+ * including this one, found a relevant memslot at start(); rechecking
+ * memslots here is unnecessary. Note, a false positive (count elevated
+ * by a different invalidation) is sub-optimal but functionally ok.
*/
- WARN_ON_ONCE(!kvm->mmu_notifier_count);
+ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+ if (!READ_ONCE(kvm->mmu_notifier_count))
+ return;
kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
}
-static void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end)
{
/*
@@ -658,12 +672,24 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
trace_kvm_unmap_hva_range(range->start, range->end);
+ /*
+ * Prevent memslot modification between range_start() and range_end()
+ * so that conditionally locking provides the same result in both
+ * functions. Without that guarantee, the mmu_notifier_count
+ * adjustments will be imbalanced.
+ *
+ * Pairs with the decrement in range_end().
+ */
+ spin_lock(&kvm->mn_invalidate_lock);
+ kvm->mn_active_invalidate_count++;
+ spin_unlock(&kvm->mn_invalidate_lock);
+
__kvm_handle_hva_range(kvm, &hva_range);
return 0;
}
-static void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
+void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end)
{
/*
@@ -694,9 +720,22 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
.flush_on_ret = false,
.may_block = mmu_notifier_range_blockable(range),
};
+ bool wake;
__kvm_handle_hva_range(kvm, &hva_range);
+ /* Pairs with the increment in range_start(). */
+ spin_lock(&kvm->mn_invalidate_lock);
+ wake = (--kvm->mn_active_invalidate_count == 0);
+ spin_unlock(&kvm->mn_invalidate_lock);
+
+ /*
+ * There can only be one waiter, since the wait happens under
+ * slots_lock.
+ */
+ if (wake)
+ rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
+
BUG_ON(kvm->mmu_notifier_count < 0);
}
@@ -897,7 +936,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
char dir_name[ITOA_MAX_LEN * 2];
struct kvm_stat_data *stat_data;
const struct _kvm_stats_desc *pdesc;
- int i;
+ int i, ret;
int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
kvm_vcpu_stats_header.num_desc;
@@ -954,6 +993,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
kvm->debugfs_dentry, stat_data,
&stat_fops_per_vm);
}
+
+ ret = kvm_arch_create_vm_debugfs(kvm);
+ if (ret) {
+ kvm_destroy_vm_debugfs(kvm);
+ return i;
+ }
+
return 0;
}
@@ -974,6 +1020,17 @@ void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
}
+/*
+ * Called after per-vm debugfs created. When called kvm->debugfs_dentry should
+ * be setup already, so we can create arch-specific debugfs entries under it.
+ * Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so
+ * a per-arch destroy interface is not needed.
+ */
+int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+ return 0;
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -991,6 +1048,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
mutex_init(&kvm->slots_arch_lock);
+ spin_lock_init(&kvm->mn_invalidate_lock);
+ rcuwait_init(&kvm->mn_memslots_update_rcuwait);
+
INIT_LIST_HEAD(&kvm->devices);
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -1113,6 +1173,16 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+ /*
+ * At this point, pending calls to invalidate_range_start()
+ * have completed but no more MMU notifiers will run, so
+ * mn_active_invalidate_count may remain unbalanced.
+ * No threads can be waiting in install_new_memslots as the
+ * last reference on KVM has been dropped, but freeing
+ * memslots would deadlock without this manual intervention.
+ */
+ WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
+ kvm->mn_active_invalidate_count = 0;
#else
kvm_arch_flush_shadow_all(kvm);
#endif
@@ -1134,6 +1204,16 @@ void kvm_get_kvm(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_get_kvm);
+/*
+ * Make sure the vm is not during destruction, which is a safe version of
+ * kvm_get_kvm(). Return true if kvm referenced successfully, false otherwise.
+ */
+bool kvm_get_kvm_safe(struct kvm *kvm)
+{
+ return refcount_inc_not_zero(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_get_kvm_safe);
+
void kvm_put_kvm(struct kvm *kvm)
{
if (refcount_dec_and_test(&kvm->users_count))
@@ -1194,8 +1274,8 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
slots->used_slots--;
- if (atomic_read(&slots->lru_slot) >= slots->used_slots)
- atomic_set(&slots->lru_slot, 0);
+ if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
+ atomic_set(&slots->last_used_slot, 0);
for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
mslots[i] = mslots[i + 1];
@@ -1364,7 +1444,22 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
+ /*
+ * Do not store the new memslots while there are invalidations in
+ * progress, otherwise the locking in invalidate_range_start and
+ * invalidate_range_end will be unbalanced.
+ */
+ spin_lock(&kvm->mn_invalidate_lock);
+ prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait);
+ while (kvm->mn_active_invalidate_count) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&kvm->mn_invalidate_lock);
+ schedule();
+ spin_lock(&kvm->mn_invalidate_lock);
+ }
+ finish_rcuwait(&kvm->mn_memslots_update_rcuwait);
rcu_assign_pointer(kvm->memslots[as_id], slots);
+ spin_unlock(&kvm->mn_invalidate_lock);
/*
* Acquired in kvm_set_memslot. Must be released before synchronize
@@ -1980,7 +2075,26 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ struct kvm_memory_slot *slot;
+ int slot_index;
+
+ slot = try_get_memslot(slots, vcpu->last_used_slot, gfn);
+ if (slot)
+ return slot;
+
+ /*
+ * Fall back to searching all memslots. We purposely use
+ * search_memslots() instead of __gfn_to_memslot() to avoid
+ * thrashing the VM-wide last_used_index in kvm_memslots.
+ */
+ slot = search_memslots(slots, gfn, &slot_index);
+ if (slot) {
+ vcpu->last_used_slot = slot_index;
+ return slot;
+ }
+
+ return NULL;
}
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
@@ -2239,7 +2353,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* Get a reference here because callers of *hva_to_pfn* and
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
- * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+ * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
* simply do nothing for reserved pfns.
*
* Whoever called remap_pfn_range is also going to call e.g.
@@ -2636,13 +2750,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
-void kvm_get_pfn(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn))
- get_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_get_pfn);
-
static int next_segment(unsigned long len, int offset)
{
if (len > PAGE_SIZE - offset)
@@ -3053,15 +3160,19 @@ out:
static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
{
- unsigned int old, val, shrink;
+ unsigned int old, val, shrink, grow_start;
old = val = vcpu->halt_poll_ns;
shrink = READ_ONCE(halt_poll_ns_shrink);
+ grow_start = READ_ONCE(halt_poll_ns_grow_start);
if (shrink == 0)
val = 0;
else
val /= shrink;
+ if (val < grow_start)
+ val = 0;
+
vcpu->halt_poll_ns = val;
trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
}
@@ -3122,13 +3233,23 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
++vcpu->stat.generic.halt_successful_poll;
if (!vcpu_valid_wakeup(vcpu))
++vcpu->stat.generic.halt_poll_invalid;
+
+ KVM_STATS_LOG_HIST_UPDATE(
+ vcpu->stat.generic.halt_poll_success_hist,
+ ktime_to_ns(ktime_get()) -
+ ktime_to_ns(start));
goto out;
}
cpu_relax();
poll_end = cur = ktime_get();
} while (kvm_vcpu_can_poll(cur, stop));
+
+ KVM_STATS_LOG_HIST_UPDATE(
+ vcpu->stat.generic.halt_poll_fail_hist,
+ ktime_to_ns(ktime_get()) - ktime_to_ns(start));
}
+
prepare_to_rcuwait(&vcpu->wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -3141,6 +3262,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
}
finish_rcuwait(&vcpu->wait);
cur = ktime_get();
+ if (waited) {
+ vcpu->stat.generic.halt_wait_ns +=
+ ktime_to_ns(cur) - ktime_to_ns(poll_end);
+ KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist,
+ ktime_to_ns(cur) - ktime_to_ns(poll_end));
+ }
out:
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
@@ -3193,16 +3320,24 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
*/
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
+ int me, cpu;
if (kvm_vcpu_wake_up(vcpu))
return;
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point
+ * after kvm_arch_vcpu_should_kick(), which could result in sending an
+ * IPI to the previous pCPU. But, that's ok because the purpose of the
+ * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
+ * vCPU also requires it to leave IN_GUEST_MODE.
+ */
me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (kvm_arch_vcpu_should_kick(vcpu))
+ if (kvm_arch_vcpu_should_kick(vcpu)) {
+ cpu = READ_ONCE(vcpu->cpu);
+ if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu);
+ }
put_cpu();
}
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
@@ -3396,7 +3531,7 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = {
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
{
struct kvm_vcpu *vcpu = file->private_data;
- unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long pages = vma_pages(vma);
if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) ||
kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) &&
@@ -3460,7 +3595,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
struct kvm_vcpu *vcpu;
struct page *page;
- if (id >= KVM_MAX_VCPU_ID)
+ if (id >= KVM_MAX_VCPU_IDS)
return -EINVAL;
mutex_lock(&kvm->lock);
@@ -3612,7 +3747,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_fpu *fpu = NULL;
struct kvm_sregs *kvm_sregs = NULL;
- if (vcpu->kvm->mm != current->mm)
+ if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
return -EIO;
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@ -3822,7 +3957,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
void __user *argp = compat_ptr(arg);
int r;
- if (vcpu->kvm->mm != current->mm)
+ if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
return -EIO;
switch (ioctl) {
@@ -3888,7 +4023,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
{
struct kvm_device *dev = filp->private_data;
- if (dev->kvm->mm != current->mm)
+ if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
return -EIO;
switch (ioctl) {
@@ -4210,7 +4345,7 @@ static long kvm_vm_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
- if (kvm->mm != current->mm)
+ if (kvm->mm != current->mm || kvm->vm_bugged)
return -EIO;
switch (ioctl) {
case KVM_CREATE_VCPU:
@@ -4421,7 +4556,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
struct kvm *kvm = filp->private_data;
int r;
- if (kvm->mm != current->mm)
+ if (kvm->mm != current->mm || kvm->vm_bugged)
return -EIO;
switch (ioctl) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
@@ -4983,12 +5118,12 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
inode->i_private;
- /* The debugfs files are a reference to the kvm struct which
- * is still valid when kvm_destroy_vm is called.
- * To avoid the race between open and the removal of the debugfs
- * directory we test against the users count.
+ /*
+ * The debugfs files are a reference to the kvm struct which
+ * is still valid when kvm_destroy_vm is called. kvm_get_kvm_safe
+ * avoids the race between open and the removal of the debugfs directory.
*/
- if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
+ if (!kvm_get_kvm_safe(stat_data->kvm))
return -ENOENT;
if (simple_attr_open(inode, file, get,
@@ -5416,9 +5551,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_3;
}
+ for_each_possible_cpu(cpu) {
+ if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu))) {
+ r = -ENOMEM;
+ goto out_free_4;
+ }
+ }
+
r = kvm_async_pf_init();
if (r)
- goto out_free;
+ goto out_free_5;
kvm_chardev_ops.owner = module;
kvm_vm_fops.owner = module;
@@ -5444,7 +5587,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
out_unreg:
kvm_async_pf_deinit();
-out_free:
+out_free_5:
+ for_each_possible_cpu(cpu)
+ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
+out_free_4:
kmem_cache_destroy(kvm_vcpu_cache);
out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);
@@ -5464,8 +5610,12 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
+ int cpu;
+
debugfs_remove_recursive(kvm_debugfs_dir);
misc_deregister(&kvm_dev);
+ for_each_possible_cpu(cpu)
+ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
kvm_async_pf_deinit();
unregister_syscore_ops(&kvm_syscore_ops);