diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 369 |
1 files changed, 241 insertions, 128 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a49df8988cd6..584a5bab3af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -168,7 +168,7 @@ __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { } -bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +bool kvm_is_zone_device_page(struct page *page) { /* * The metadata used by is_zone_device_page() to determine whether or @@ -176,25 +176,42 @@ bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) * the device has been pinned, e.g. by get_user_pages(). WARN if the * page_count() is zero to help detect bad usage of this helper. */ - if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + if (WARN_ON_ONCE(!page_count(page))) return false; - return is_zone_device_page(pfn_to_page(pfn)); + return is_zone_device_page(page); } -bool kvm_is_reserved_pfn(kvm_pfn_t pfn) +/* + * Returns a 'struct page' if the pfn is "valid" and backed by a refcounted + * page, NULL otherwise. Note, the list of refcounted PG_reserved page types + * is likely incomplete, it has been compiled purely through people wanting to + * back guest with a certain type of memory and encountering issues. + */ +struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn) { + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (!PageReserved(page)) + return page; + + /* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */ + if (is_zero_pfn(pfn)) + return page; + /* * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting * perspective they are "normal" pages, albeit with slightly different * usage rules. */ - if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)) && - !is_zero_pfn(pfn) && - !kvm_is_zone_device_pfn(pfn); + if (kvm_is_zone_device_page(page)) + return page; - return true; + return NULL; } /* @@ -239,7 +256,7 @@ static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req) return mode == IN_GUEST_MODE; } -static void ack_flush(void *_completed) +static void ack_kick(void *_completed) { } @@ -248,7 +265,7 @@ static inline bool kvm_kick_many_cpus(struct cpumask *cpus, bool wait) if (cpumask_empty(cpus)) return false; - smp_call_function_many(cpus, ack_flush, NULL, wait); + smp_call_function_many(cpus, ack_kick, NULL, wait); return true; } @@ -379,14 +396,31 @@ static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, return (void *)__get_free_page(gfp_flags); } -int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) +int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) { + gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; void *obj; if (mc->nobjs >= min) return 0; - while (mc->nobjs < ARRAY_SIZE(mc->objects)) { - obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT); + + if (unlikely(!mc->objects)) { + if (WARN_ON_ONCE(!capacity)) + return -EIO; + + mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp); + if (!mc->objects) + return -ENOMEM; + + mc->capacity = capacity; + } + + /* It is illegal to request a different capacity across topups. */ + if (WARN_ON_ONCE(mc->capacity != capacity)) + return -EIO; + + while (mc->nobjs < mc->capacity) { + obj = mmu_memory_cache_alloc_obj(mc, gfp); if (!obj) return mc->nobjs >= min ? 0 : -ENOMEM; mc->objects[mc->nobjs++] = obj; @@ -394,6 +428,11 @@ int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) return 0; } +int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) +{ + return __kvm_mmu_topup_memory_cache(mc, KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE, min); +} + int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc) { return mc->nobjs; @@ -407,6 +446,11 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) else free_page((unsigned long)mc->objects[--mc->nobjs]); } + + kvfree(mc->objects); + + mc->objects = NULL; + mc->capacity = 0; } void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) @@ -440,6 +484,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->ready = false; preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; + + /* Fill the stats id string for the vcpu */ + snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", + task_pid_nr(current), id); } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -654,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, /* * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_notifier_count is zero, then no in-progress invalidations, - * including this one, found a relevant memslot at start(); rechecking - * memslots here is unnecessary. Note, a false positive (count elevated - * by a different invalidation) is sub-optimal but functionally ok. + * If mmu_invalidate_in_progress is zero, then no in-progress + * invalidations, including this one, found a relevant memslot at + * start(); rechecking memslots here is unnecessary. Note, a false + * positive (count elevated by a different invalidation) is sub-optimal + * but functionally ok. */ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_notifier_count)) + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ - kvm->mmu_notifier_count++; - if (likely(kvm->mmu_notifier_count == 1)) { - kvm->mmu_notifier_range_start = start; - kvm->mmu_notifier_range_end = end; + kvm->mmu_invalidate_in_progress++; + if (likely(kvm->mmu_invalidate_in_progress == 1)) { + kvm->mmu_invalidate_range_start = start; + kvm->mmu_invalidate_range_end = end; } else { /* * Fully tracking multiple concurrent ranges has diminishing @@ -688,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, * accumulate and persist until all outstanding invalidates * complete. */ - kvm->mmu_notifier_range_start = - min(kvm->mmu_notifier_range_start, start); - kvm->mmu_notifier_range_end = - max(kvm->mmu_notifier_range_end, end); + kvm->mmu_invalidate_range_start = + min(kvm->mmu_invalidate_range_start, start); + kvm->mmu_invalidate_range_end = + max(kvm->mmu_invalidate_range_end, end); } } @@ -704,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = kvm_unmap_gfn_range, - .on_lock = kvm_inc_notifier_count, + .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), @@ -715,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, /* * Prevent memslot modification between range_start() and range_end() * so that conditionally locking provides the same result in both - * functions. Without that guarantee, the mmu_notifier_count + * functions. Without that guarantee, the mmu_invalidate_in_progress * adjustments will be imbalanced. * * Pairs with the decrement in range_end(). @@ -724,6 +773,16 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mn_active_invalidate_count++; spin_unlock(&kvm->mn_invalidate_lock); + /* + * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e. + * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring + * each cache's lock. There are relatively few caches in existence at + * any given time, and the caches themselves can check for hva overlap, + * i.e. don't need to rely on memslot overlap checks for performance. + * Because this runs without holding mmu_lock, the pfn caches must use + * mn_active_invalidate_count (see above) instead of + * mmu_invalidate_in_progress. + */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -732,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have * been freed. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; smp_wmb(); /* * The above sequence increase must be visible before the * below count decrease, which is ensured by the smp_wmb above - * in conjunction with the smp_rmb in mmu_notifier_retry(). + * in conjunction with the smp_rmb in mmu_invalidate_retry(). */ - kvm->mmu_notifier_count--; + kvm->mmu_invalidate_in_progress--; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -759,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = (void *)kvm_null_fn, - .on_lock = kvm_dec_notifier_count, + .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), @@ -780,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, if (wake) rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); - BUG_ON(kvm->mmu_notifier_count < 0); + BUG_ON(kvm->mmu_invalidate_in_progress < 0); } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -964,21 +1023,21 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) } } -static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) +static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) { static DEFINE_MUTEX(kvm_debugfs_lock); struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; - int i, ret; + int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; if (!debugfs_initialized()) return 0; - snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); + snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); mutex_lock(&kvm_debugfs_lock); dent = debugfs_lookup(dir_name, kvm_debugfs_dir); if (dent) { @@ -997,13 +1056,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); if (!kvm->debugfs_stat_data) - return -ENOMEM; + goto out_err; for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { pdesc = &kvm_vm_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1018,7 +1077,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) pdesc = &kvm_vcpu_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1030,12 +1089,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) } ret = kvm_arch_create_vm_debugfs(kvm); - if (ret) { - kvm_destroy_vm_debugfs(kvm); - return i; - } + if (ret) + goto out_err; return 0; +out_err: + kvm_destroy_vm_debugfs(kvm); + return ret; } /* @@ -1066,7 +1126,7 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) return 0; } -static struct kvm *kvm_create_vm(unsigned long type) +static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); struct kvm_memslots *slots; @@ -1076,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!kvm) return ERR_PTR(-ENOMEM); + /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ + __module_get(kvm_chardev_ops.owner); + KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1102,6 +1165,9 @@ static struct kvm *kvm_create_vm(unsigned long type) */ kvm->debugfs_dentry = ERR_PTR(-ENOENT); + snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", + task_pid_nr(current)); + if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) @@ -1150,6 +1216,14 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err_no_mmu_notifier; + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) + goto out_no_coalesced_mmio; + + r = kvm_create_vm_debugfs(kvm, fdname); + if (r) + goto out_err_no_debugfs; + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err; @@ -1161,19 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); - /* - * When the fd passed to this ioctl() is opened it pins the module, - * but try_module_get() also prevents getting a reference if the module - * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). - */ - if (!try_module_get(kvm_chardev_ops.owner)) { - r = -ENODEV; - goto out_err; - } - return kvm; out_err: + kvm_destroy_vm_debugfs(kvm); +out_err_no_debugfs: + kvm_coalesced_mmio_free(kvm); +out_no_coalesced_mmio: #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -1192,6 +1260,7 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); + module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -2449,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, { unsigned int flags = FOLL_HWPOISON; struct page *page; - int npages = 0; + int npages; might_sleep(); @@ -2492,9 +2561,12 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) static int kvm_try_get_pfn(kvm_pfn_t pfn) { - if (kvm_is_reserved_pfn(pfn)) + struct page *page = kvm_pfn_to_refcounted_page(pfn); + + if (!page) return 1; - return get_page_unless_zero(pfn_to_page(pfn)); + + return get_page_unless_zero(page); } static int hva_to_pfn_remapped(struct vm_area_struct *vma, @@ -2580,7 +2652,7 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, bool write_fault, bool *writable) { struct vm_area_struct *vma; - kvm_pfn_t pfn = 0; + kvm_pfn_t pfn; int npages, r; /* we can do it either atomically or asynchronously, not both */ @@ -2711,34 +2783,32 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, } EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); -static struct page *kvm_pfn_to_page(kvm_pfn_t pfn) -{ - if (is_error_noslot_pfn(pfn)) - return KVM_ERR_PTR_BAD_PAGE; - - if (kvm_is_reserved_pfn(pfn)) { - WARN_ON(1); - return KVM_ERR_PTR_BAD_PAGE; - } - - return pfn_to_page(pfn); -} - +/* + * Do not use this helper unless you are absolutely certain the gfn _must_ be + * backed by 'struct page'. A valid example is if the backing memslot is + * controlled by KVM. Note, if the returned page is valid, it's refcount has + * been elevated by gfn_to_pfn(). + */ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { + struct page *page; kvm_pfn_t pfn; pfn = gfn_to_pfn(kvm, gfn); - return kvm_pfn_to_page(pfn); + if (is_error_noslot_pfn(pfn)) + return KVM_ERR_PTR_BAD_PAGE; + + page = kvm_pfn_to_refcounted_page(pfn); + if (!page) + return KVM_ERR_PTR_BAD_PAGE; + + return page; } EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_pfn(kvm_pfn_t pfn, bool dirty) { - if (pfn == 0) - return; - if (dirty) kvm_release_pfn_dirty(pfn); else @@ -2804,28 +2874,48 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) } EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); -struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) +static bool kvm_is_ad_tracked_page(struct page *page) { - kvm_pfn_t pfn; + /* + * Per page-flags.h, pages tagged PG_reserved "should in general not be + * touched (e.g. set dirty) except by its owner". + */ + return !PageReserved(page); +} - pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn); +static void kvm_set_page_dirty(struct page *page) +{ + if (kvm_is_ad_tracked_page(page)) + SetPageDirty(page); +} - return kvm_pfn_to_page(pfn); +static void kvm_set_page_accessed(struct page *page) +{ + if (kvm_is_ad_tracked_page(page)) + mark_page_accessed(page); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page); void kvm_release_page_clean(struct page *page) { WARN_ON(is_error_page(page)); - kvm_release_pfn_clean(page_to_pfn(page)); + kvm_set_page_accessed(page); + put_page(page); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(kvm_pfn_t pfn) { - if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) - put_page(pfn_to_page(pfn)); + struct page *page; + + if (is_error_noslot_pfn(pfn)) + return; + + page = kvm_pfn_to_refcounted_page(pfn); + if (!page) + return; + + kvm_release_page_clean(page); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -2833,28 +2923,48 @@ void kvm_release_page_dirty(struct page *page) { WARN_ON(is_error_page(page)); - kvm_release_pfn_dirty(page_to_pfn(page)); + kvm_set_page_dirty(page); + kvm_release_page_clean(page); } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); void kvm_release_pfn_dirty(kvm_pfn_t pfn) { - kvm_set_pfn_dirty(pfn); - kvm_release_pfn_clean(pfn); + struct page *page; + + if (is_error_noslot_pfn(pfn)) + return; + + page = kvm_pfn_to_refcounted_page(pfn); + if (!page) + return; + + kvm_release_page_dirty(page); } EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); +/* + * Note, checking for an error/noslot pfn is the caller's responsibility when + * directly marking a page dirty/accessed. Unlike the "release" helpers, the + * "set" helpers are not to be used when the pfn might point at garbage. + */ void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) - SetPageDirty(pfn_to_page(pfn)); + if (WARN_ON(is_error_noslot_pfn(pfn))) + return; + + if (pfn_valid(pfn)) + kvm_set_page_dirty(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) - mark_page_accessed(pfn_to_page(pfn)); + if (WARN_ON(is_error_noslot_pfn(pfn))) + return; + + if (pfn_valid(pfn)) + kvm_set_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); @@ -3728,9 +3838,18 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS +static int vcpu_get_pid(void *data, u64 *val) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; + *val = pid_nr(rcu_access_pointer(vcpu->pid)); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vcpu_get_pid_fops, vcpu_get_pid, NULL, "%llu\n"); + static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { -#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS struct dentry *debugfs_dentry; char dir_name[ITOA_MAX_LEN * 2]; @@ -3740,10 +3859,12 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); debugfs_dentry = debugfs_create_dir(dir_name, vcpu->kvm->debugfs_dentry); + debugfs_create_file("pid", 0444, debugfs_dentry, vcpu, + &vcpu_get_pid_fops); kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry); -#endif } +#endif /* * Creates some virtual cpus. Good luck creating more than one. @@ -3763,13 +3884,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) return -EINVAL; } + r = kvm_arch_vcpu_precreate(kvm, id); + if (r) { + mutex_unlock(&kvm->lock); + return r; + } + kvm->created_vcpus++; mutex_unlock(&kvm->lock); - r = kvm_arch_vcpu_precreate(kvm, id); - if (r) - goto vcpu_decrement; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); if (!vcpu) { r = -ENOMEM; @@ -3809,10 +3932,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto unlock_vcpu_destroy; - /* Fill the stats id string for the vcpu */ - snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", - task_pid_nr(current), id); - /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); @@ -4261,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type) static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { - const struct kvm_device_ops *ops = NULL; + const struct kvm_device_ops *ops; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int type; @@ -4779,28 +4898,25 @@ EXPORT_SYMBOL_GPL(file_is_kvm); static int kvm_dev_ioctl_create_vm(unsigned long type) { - int r; + char fdname[ITOA_MAX_LEN + 1]; + int r, fd; struct kvm *kvm; struct file *file; - kvm = kvm_create_vm(type); - if (IS_ERR(kvm)) - return PTR_ERR(kvm); -#ifdef CONFIG_KVM_MMIO - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) - goto put_kvm; -#endif - r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) - goto put_kvm; + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; - snprintf(kvm->stats_id, sizeof(kvm->stats_id), - "kvm-%d", task_pid_nr(current)); + snprintf(fdname, sizeof(fdname), "%d", fd); + + kvm = kvm_create_vm(type, fdname); + if (IS_ERR(kvm)) { + r = PTR_ERR(kvm); + goto put_fd; + } file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { - put_unused_fd(r); r = PTR_ERR(file); goto put_kvm; } @@ -4811,18 +4927,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) * cases it will be called by the final fput(file) and will take * care of doing kvm_put_kvm(kvm). */ - if (kvm_create_vm_debugfs(kvm, r) < 0) { - put_unused_fd(r); - fput(file); - return -ENOMEM; - } kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); - fd_install(r, file); - return r; + fd_install(fd, file); + return fd; put_kvm: kvm_put_kvm(kvm); +put_fd: + put_unused_fd(fd); return r; } |