diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
| -rw-r--r-- | virt/kvm/kvm_main.c | 663 | 
1 files changed, 443 insertions, 220 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70f03ce0e5c1..74bdb7bf3295 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -149,8 +149,6 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);  __visible bool kvm_rebooting;  EXPORT_SYMBOL_GPL(kvm_rebooting); -static bool largepages_enabled = true; -  #define KVM_EVENT_CREATE_VM 0  #define KVM_EVENT_DESTROY_VM 1  static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); @@ -566,7 +564,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)  		return NULL;  	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) -		slots->id_to_index[i] = slots->memslots[i].id = i; +		slots->id_to_index[i] = -1;  	return slots;  } @@ -580,18 +578,14 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)  	memslot->dirty_bitmap = NULL;  } -/* - * Free any memory in @free but not in @dont. - */ -static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, -			      struct kvm_memory_slot *dont) +static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)  { -	if (!dont || free->dirty_bitmap != dont->dirty_bitmap) -		kvm_destroy_dirty_bitmap(free); +	kvm_destroy_dirty_bitmap(slot); -	kvm_arch_free_memslot(kvm, free, dont); +	kvm_arch_free_memslot(kvm, slot); -	free->npages = 0; +	slot->flags = 0; +	slot->npages = 0;  }  static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) @@ -602,7 +596,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)  		return;  	kvm_for_each_memslot(memslot, slots) -		kvm_free_memslot(kvm, memslot, NULL); +		kvm_free_memslot(kvm, memslot);  	kvfree(slots);  } @@ -860,9 +854,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)  /*   * Allocation size is twice as large as the actual dirty bitmap size. - * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. + * See kvm_vm_ioctl_get_dirty_log() why this is needed.   */ -static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) +static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)  {  	unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); @@ -874,63 +868,165 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)  }  /* - * Insert memslot and re-sort memslots based on their GFN, - * so binary search could be used to lookup GFN. - * Sorting algorithm takes advantage of having initially - * sorted array and known changed memslot position. + * Delete a memslot by decrementing the number of used slots and shifting all + * other entries in the array forward one spot.   */ -static void update_memslots(struct kvm_memslots *slots, -			    struct kvm_memory_slot *new, -			    enum kvm_mr_change change) +static inline void kvm_memslot_delete(struct kvm_memslots *slots, +				      struct kvm_memory_slot *memslot)  { -	int id = new->id; -	int i = slots->id_to_index[id];  	struct kvm_memory_slot *mslots = slots->memslots; +	int i; -	WARN_ON(mslots[i].id != id); -	switch (change) { -	case KVM_MR_CREATE: -		slots->used_slots++; -		WARN_ON(mslots[i].npages || !new->npages); -		break; -	case KVM_MR_DELETE: -		slots->used_slots--; -		WARN_ON(new->npages || !mslots[i].npages); -		break; -	default: -		break; -	} +	if (WARN_ON(slots->id_to_index[memslot->id] == -1)) +		return; -	while (i < KVM_MEM_SLOTS_NUM - 1 && -	       new->base_gfn <= mslots[i + 1].base_gfn) { -		if (!mslots[i + 1].npages) -			break; +	slots->used_slots--; + +	if (atomic_read(&slots->lru_slot) >= slots->used_slots) +		atomic_set(&slots->lru_slot, 0); + +	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {  		mslots[i] = mslots[i + 1];  		slots->id_to_index[mslots[i].id] = i; -		i++;  	} +	mslots[i] = *memslot; +	slots->id_to_index[memslot->id] = -1; +} + +/* + * "Insert" a new memslot by incrementing the number of used slots.  Returns + * the new slot's initial index into the memslots array. + */ +static inline int kvm_memslot_insert_back(struct kvm_memslots *slots) +{ +	return slots->used_slots++; +} + +/* + * Move a changed memslot backwards in the array by shifting existing slots + * with a higher GFN toward the front of the array.  Note, the changed memslot + * itself is not preserved in the array, i.e. not swapped at this time, only + * its new index into the array is tracked.  Returns the changed memslot's + * current index into the memslots array. + */ +static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, +					    struct kvm_memory_slot *memslot) +{ +	struct kvm_memory_slot *mslots = slots->memslots; +	int i; + +	if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) || +	    WARN_ON_ONCE(!slots->used_slots)) +		return -1;  	/* -	 * The ">=" is needed when creating a slot with base_gfn == 0, -	 * so that it moves before all those with base_gfn == npages == 0. -	 * -	 * On the other hand, if new->npages is zero, the above loop has -	 * already left i pointing to the beginning of the empty part of -	 * mslots, and the ">=" would move the hole backwards in this -	 * case---which is wrong.  So skip the loop when deleting a slot. +	 * Move the target memslot backward in the array by shifting existing +	 * memslots with a higher GFN (than the target memslot) towards the +	 * front of the array.  	 */ -	if (new->npages) { -		while (i > 0 && -		       new->base_gfn >= mslots[i - 1].base_gfn) { -			mslots[i] = mslots[i - 1]; -			slots->id_to_index[mslots[i].id] = i; -			i--; -		} -	} else -		WARN_ON_ONCE(i != slots->used_slots); +	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) { +		if (memslot->base_gfn > mslots[i + 1].base_gfn) +			break; + +		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn); + +		/* Shift the next memslot forward one and update its index. */ +		mslots[i] = mslots[i + 1]; +		slots->id_to_index[mslots[i].id] = i; +	} +	return i; +} + +/* + * Move a changed memslot forwards in the array by shifting existing slots with + * a lower GFN toward the back of the array.  Note, the changed memslot itself + * is not preserved in the array, i.e. not swapped at this time, only its new + * index into the array is tracked.  Returns the changed memslot's final index + * into the memslots array. + */ +static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, +					   struct kvm_memory_slot *memslot, +					   int start) +{ +	struct kvm_memory_slot *mslots = slots->memslots; +	int i; + +	for (i = start; i > 0; i--) { +		if (memslot->base_gfn < mslots[i - 1].base_gfn) +			break; + +		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn); + +		/* Shift the next memslot back one and update its index. */ +		mslots[i] = mslots[i - 1]; +		slots->id_to_index[mslots[i].id] = i; +	} +	return i; +} + +/* + * Re-sort memslots based on their GFN to account for an added, deleted, or + * moved memslot.  Sorting memslots by GFN allows using a binary search during + * memslot lookup. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!  I.e. the entry + * at memslots[0] has the highest GFN. + * + * The sorting algorithm takes advantage of having initially sorted memslots + * and knowing the position of the changed memslot.  Sorting is also optimized + * by not swapping the updated memslot and instead only shifting other memslots + * and tracking the new index for the update memslot.  Only once its final + * index is known is the updated memslot copied into its position in the array. + * + *  - When deleting a memslot, the deleted memslot simply needs to be moved to + *    the end of the array. + * + *  - When creating a memslot, the algorithm "inserts" the new memslot at the + *    end of the array and then it forward to its correct location. + * + *  - When moving a memslot, the algorithm first moves the updated memslot + *    backward to handle the scenario where the memslot's GFN was changed to a + *    lower value.  update_memslots() then falls through and runs the same flow + *    as creating a memslot to move the memslot forward to handle the scenario + *    where its GFN was changed to a higher value. + * + * Note, slots are sorted from highest->lowest instead of lowest->highest for + * historical reasons.  Originally, invalid memslots where denoted by having + * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots + * to the end of the array.  The current algorithm uses dedicated logic to + * delete a memslot and thus does not rely on invalid memslots having GFN=0. + * + * The other historical motiviation for highest->lowest was to improve the + * performance of memslot lookup.  KVM originally used a linear search starting + * at memslots[0].  On x86, the largest memslot usually has one of the highest, + * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a + * single memslot above the 4gb boundary.  As the largest memslot is also the + * most likely to be referenced, sorting it to the front of the array was + * advantageous.  The current binary search starts from the middle of the array + * and uses an LRU pointer to improve performance for all memslots and GFNs. + */ +static void update_memslots(struct kvm_memslots *slots, +			    struct kvm_memory_slot *memslot, +			    enum kvm_mr_change change) +{ +	int i; + +	if (change == KVM_MR_DELETE) { +		kvm_memslot_delete(slots, memslot); +	} else { +		if (change == KVM_MR_CREATE) +			i = kvm_memslot_insert_back(slots); +		else +			i = kvm_memslot_move_backward(slots, memslot); +		i = kvm_memslot_move_forward(slots, memslot, i); -	mslots[i] = *new; -	slots->id_to_index[mslots[i].id] = i; +		/* +		 * Copy the memslot to its new position in memslots and update +		 * its index accordingly. +		 */ +		slots->memslots[i] = *memslot; +		slots->id_to_index[memslot->id] = i; +	}  }  static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) @@ -984,6 +1080,112 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,  }  /* + * Note, at a minimum, the current number of used slots must be allocated, even + * when deleting a memslot, as we need a complete duplicate of the memslots for + * use when invalidating a memslot prior to deleting/moving the memslot. + */ +static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old, +					     enum kvm_mr_change change) +{ +	struct kvm_memslots *slots; +	size_t old_size, new_size; + +	old_size = sizeof(struct kvm_memslots) + +		   (sizeof(struct kvm_memory_slot) * old->used_slots); + +	if (change == KVM_MR_CREATE) +		new_size = old_size + sizeof(struct kvm_memory_slot); +	else +		new_size = old_size; + +	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT); +	if (likely(slots)) +		memcpy(slots, old, old_size); + +	return slots; +} + +static int kvm_set_memslot(struct kvm *kvm, +			   const struct kvm_userspace_memory_region *mem, +			   struct kvm_memory_slot *old, +			   struct kvm_memory_slot *new, int as_id, +			   enum kvm_mr_change change) +{ +	struct kvm_memory_slot *slot; +	struct kvm_memslots *slots; +	int r; + +	slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change); +	if (!slots) +		return -ENOMEM; + +	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { +		/* +		 * Note, the INVALID flag needs to be in the appropriate entry +		 * in the freshly allocated memslots, not in @old or @new. +		 */ +		slot = id_to_memslot(slots, old->id); +		slot->flags |= KVM_MEMSLOT_INVALID; + +		/* +		 * We can re-use the old memslots, the only difference from the +		 * newly installed memslots is the invalid flag, which will get +		 * dropped by update_memslots anyway.  We'll also revert to the +		 * old memslots if preparing the new memory region fails. +		 */ +		slots = install_new_memslots(kvm, as_id, slots); + +		/* From this point no new shadow pages pointing to a deleted, +		 * or moved, memslot will be created. +		 * +		 * validation of sp->gfn happens in: +		 *	- gfn_to_hva (kvm_read_guest, gfn_to_pfn) +		 *	- kvm_is_visible_gfn (mmu_check_root) +		 */ +		kvm_arch_flush_shadow_memslot(kvm, slot); +	} + +	r = kvm_arch_prepare_memory_region(kvm, new, mem, change); +	if (r) +		goto out_slots; + +	update_memslots(slots, new, change); +	slots = install_new_memslots(kvm, as_id, slots); + +	kvm_arch_commit_memory_region(kvm, mem, old, new, change); + +	kvfree(slots); +	return 0; + +out_slots: +	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) +		slots = install_new_memslots(kvm, as_id, slots); +	kvfree(slots); +	return r; +} + +static int kvm_delete_memslot(struct kvm *kvm, +			      const struct kvm_userspace_memory_region *mem, +			      struct kvm_memory_slot *old, int as_id) +{ +	struct kvm_memory_slot new; +	int r; + +	if (!old->npages) +		return -EINVAL; + +	memset(&new, 0, sizeof(new)); +	new.id = old->id; + +	r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE); +	if (r) +		return r; + +	kvm_free_memslot(kvm, old); +	return 0; +} + +/*   * Allocate some memory and give it an address in the guest physical address   * space.   * @@ -994,162 +1196,118 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,  int __kvm_set_memory_region(struct kvm *kvm,  			    const struct kvm_userspace_memory_region *mem)  { -	int r; -	gfn_t base_gfn; -	unsigned long npages; -	struct kvm_memory_slot *slot;  	struct kvm_memory_slot old, new; -	struct kvm_memslots *slots = NULL, *old_memslots; -	int as_id, id; +	struct kvm_memory_slot *tmp;  	enum kvm_mr_change change; +	int as_id, id; +	int r;  	r = check_memory_region_flags(mem);  	if (r) -		goto out; +		return r; -	r = -EINVAL;  	as_id = mem->slot >> 16;  	id = (u16)mem->slot;  	/* General sanity checks */  	if (mem->memory_size & (PAGE_SIZE - 1)) -		goto out; +		return -EINVAL;  	if (mem->guest_phys_addr & (PAGE_SIZE - 1)) -		goto out; +		return -EINVAL;  	/* We can read the guest memory with __xxx_user() later on. */  	if ((id < KVM_USER_MEM_SLOTS) &&  	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||  	     !access_ok((void __user *)(unsigned long)mem->userspace_addr,  			mem->memory_size))) -		goto out; +		return -EINVAL;  	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) -		goto out; +		return -EINVAL;  	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) -		goto out; - -	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id); -	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; -	npages = mem->memory_size >> PAGE_SHIFT; +		return -EINVAL; -	if (npages > KVM_MEM_MAX_NR_PAGES) -		goto out; +	/* +	 * Make a full copy of the old memslot, the pointer will become stale +	 * when the memslots are re-sorted by update_memslots(), and the old +	 * memslot needs to be referenced after calling update_memslots(), e.g. +	 * to free its resources and for arch specific behavior. +	 */ +	tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id); +	if (tmp) { +		old = *tmp; +		tmp = NULL; +	} else { +		memset(&old, 0, sizeof(old)); +		old.id = id; +	} -	new = old = *slot; +	if (!mem->memory_size) +		return kvm_delete_memslot(kvm, mem, &old, as_id);  	new.id = id; -	new.base_gfn = base_gfn; -	new.npages = npages; +	new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; +	new.npages = mem->memory_size >> PAGE_SHIFT;  	new.flags = mem->flags; +	new.userspace_addr = mem->userspace_addr; -	if (npages) { -		if (!old.npages) -			change = KVM_MR_CREATE; -		else { /* Modify an existing slot. */ -			if ((mem->userspace_addr != old.userspace_addr) || -			    (npages != old.npages) || -			    ((new.flags ^ old.flags) & KVM_MEM_READONLY)) -				goto out; +	if (new.npages > KVM_MEM_MAX_NR_PAGES) +		return -EINVAL; -			if (base_gfn != old.base_gfn) -				change = KVM_MR_MOVE; -			else if (new.flags != old.flags) -				change = KVM_MR_FLAGS_ONLY; -			else { /* Nothing to change. */ -				r = 0; -				goto out; -			} -		} -	} else { -		if (!old.npages) -			goto out; +	if (!old.npages) { +		change = KVM_MR_CREATE; +		new.dirty_bitmap = NULL; +		memset(&new.arch, 0, sizeof(new.arch)); +	} else { /* Modify an existing slot. */ +		if ((new.userspace_addr != old.userspace_addr) || +		    (new.npages != old.npages) || +		    ((new.flags ^ old.flags) & KVM_MEM_READONLY)) +			return -EINVAL; -		change = KVM_MR_DELETE; -		new.base_gfn = 0; -		new.flags = 0; +		if (new.base_gfn != old.base_gfn) +			change = KVM_MR_MOVE; +		else if (new.flags != old.flags) +			change = KVM_MR_FLAGS_ONLY; +		else /* Nothing to change. */ +			return 0; + +		/* Copy dirty_bitmap and arch from the current memslot. */ +		new.dirty_bitmap = old.dirty_bitmap; +		memcpy(&new.arch, &old.arch, sizeof(new.arch));  	}  	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {  		/* Check for overlaps */ -		r = -EEXIST; -		kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { -			if (slot->id == id) +		kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) { +			if (tmp->id == id)  				continue; -			if (!((base_gfn + npages <= slot->base_gfn) || -			      (base_gfn >= slot->base_gfn + slot->npages))) -				goto out; +			if (!((new.base_gfn + new.npages <= tmp->base_gfn) || +			      (new.base_gfn >= tmp->base_gfn + tmp->npages))) +				return -EEXIST;  		}  	} -	/* Free page dirty bitmap if unneeded */ +	/* Allocate/free page dirty bitmap as needed */  	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))  		new.dirty_bitmap = NULL; +	else if (!new.dirty_bitmap) { +		r = kvm_alloc_dirty_bitmap(&new); +		if (r) +			return r; -	r = -ENOMEM; -	if (change == KVM_MR_CREATE) { -		new.userspace_addr = mem->userspace_addr; - -		if (kvm_arch_create_memslot(kvm, &new, npages)) -			goto out_free; -	} - -	/* Allocate page dirty bitmap if needed */ -	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { -		if (kvm_create_dirty_bitmap(&new) < 0) -			goto out_free; -	} - -	slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); -	if (!slots) -		goto out_free; -	memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); - -	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { -		slot = id_to_memslot(slots, id); -		slot->flags |= KVM_MEMSLOT_INVALID; - -		old_memslots = install_new_memslots(kvm, as_id, slots); - -		/* From this point no new shadow pages pointing to a deleted, -		 * or moved, memslot will be created. -		 * -		 * validation of sp->gfn happens in: -		 *	- gfn_to_hva (kvm_read_guest, gfn_to_pfn) -		 *	- kvm_is_visible_gfn (mmu_check_root) -		 */ -		kvm_arch_flush_shadow_memslot(kvm, slot); - -		/* -		 * We can re-use the old_memslots from above, the only difference -		 * from the currently installed memslots is the invalid flag.  This -		 * will get overwritten by update_memslots anyway. -		 */ -		slots = old_memslots; +		if (kvm_dirty_log_manual_protect_and_init_set(kvm)) +			bitmap_set(new.dirty_bitmap, 0, new.npages);  	} -	r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); +	r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);  	if (r) -		goto out_slots; - -	/* actual memory is freed via old in kvm_free_memslot below */ -	if (change == KVM_MR_DELETE) { -		new.dirty_bitmap = NULL; -		memset(&new.arch, 0, sizeof(new.arch)); -	} - -	update_memslots(slots, &new, change); -	old_memslots = install_new_memslots(kvm, as_id, slots); - -	kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); +		goto out_bitmap; -	kvm_free_memslot(kvm, &old, &new); -	kvfree(old_memslots); +	if (old.dirty_bitmap && !new.dirty_bitmap) +		kvm_destroy_dirty_bitmap(&old);  	return 0; -out_slots: -	kvfree(slots); -out_free: -	kvm_free_memslot(kvm, &new, &old); -out: +out_bitmap: +	if (new.dirty_bitmap && !old.dirty_bitmap) +		kvm_destroy_dirty_bitmap(&new);  	return r;  }  EXPORT_SYMBOL_GPL(__kvm_set_memory_region); @@ -1175,31 +1333,43 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,  	return kvm_set_memory_region(kvm, mem);  } -int kvm_get_dirty_log(struct kvm *kvm, -			struct kvm_dirty_log *log, int *is_dirty) +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +/** + * kvm_get_dirty_log - get a snapshot of dirty pages + * @kvm:	pointer to kvm instance + * @log:	slot id and address to which we copy the log + * @is_dirty:	set to '1' if any dirty pages were found + * @memslot:	set to the associated memslot, always valid on success + */ +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, +		      int *is_dirty, struct kvm_memory_slot **memslot)  {  	struct kvm_memslots *slots; -	struct kvm_memory_slot *memslot;  	int i, as_id, id;  	unsigned long n;  	unsigned long any = 0; +	*memslot = NULL; +	*is_dirty = 0; +  	as_id = log->slot >> 16;  	id = (u16)log->slot;  	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)  		return -EINVAL;  	slots = __kvm_memslots(kvm, as_id); -	memslot = id_to_memslot(slots, id); -	if (!memslot->dirty_bitmap) +	*memslot = id_to_memslot(slots, id); +	if (!(*memslot) || !(*memslot)->dirty_bitmap)  		return -ENOENT; -	n = kvm_dirty_bitmap_bytes(memslot); +	kvm_arch_sync_dirty_log(kvm, *memslot); + +	n = kvm_dirty_bitmap_bytes(*memslot);  	for (i = 0; !any && i < n/sizeof(long); ++i) -		any = memslot->dirty_bitmap[i]; +		any = (*memslot)->dirty_bitmap[i]; -	if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) +	if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n))  		return -EFAULT;  	if (any) @@ -1208,13 +1378,12 @@ int kvm_get_dirty_log(struct kvm *kvm,  }  EXPORT_SYMBOL_GPL(kvm_get_dirty_log); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */  /**   * kvm_get_dirty_log_protect - get a snapshot of dirty pages   *	and reenable dirty page tracking for the corresponding pages.   * @kvm:	pointer to kvm instance   * @log:	slot id and address to which we copy the log - * @flush:	true if TLB flush is needed by caller   *   * We need to keep it in mind that VCPU threads can write to the bitmap   * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1231,8 +1400,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);   * exiting to userspace will be logged for the next call.   *   */ -int kvm_get_dirty_log_protect(struct kvm *kvm, -			struct kvm_dirty_log *log, bool *flush) +static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)  {  	struct kvm_memslots *slots;  	struct kvm_memory_slot *memslot; @@ -1240,6 +1408,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,  	unsigned long n;  	unsigned long *dirty_bitmap;  	unsigned long *dirty_bitmap_buffer; +	bool flush;  	as_id = log->slot >> 16;  	id = (u16)log->slot; @@ -1248,13 +1417,15 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,  	slots = __kvm_memslots(kvm, as_id);  	memslot = id_to_memslot(slots, id); +	if (!memslot || !memslot->dirty_bitmap) +		return -ENOENT;  	dirty_bitmap = memslot->dirty_bitmap; -	if (!dirty_bitmap) -		return -ENOENT; + +	kvm_arch_sync_dirty_log(kvm, memslot);  	n = kvm_dirty_bitmap_bytes(memslot); -	*flush = false; +	flush = false;  	if (kvm->manual_dirty_log_protect) {  		/*  		 * Unlike kvm_get_dirty_log, we always return false in *flush, @@ -1277,7 +1448,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,  			if (!dirty_bitmap[i])  				continue; -			*flush = true; +			flush = true;  			mask = xchg(&dirty_bitmap[i], 0);  			dirty_bitmap_buffer[i] = mask; @@ -1288,21 +1459,55 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,  		spin_unlock(&kvm->mmu_lock);  	} +	if (flush) +		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); +  	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))  		return -EFAULT;  	return 0;  } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); + + +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed  and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + *   1. Take a snapshot of the bit and clear it if needed. + *   2. Write protect the corresponding page. + *   3. Copy the snapshot to the userspace. + *   4. Flush TLB's if needed. + */ +static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, +				      struct kvm_dirty_log *log) +{ +	int r; + +	mutex_lock(&kvm->slots_lock); + +	r = kvm_get_dirty_log_protect(kvm, log); + +	mutex_unlock(&kvm->slots_lock); +	return r; +}  /**   * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap   *	and reenable dirty page tracking for the corresponding pages.   * @kvm:	pointer to kvm instance   * @log:	slot id and address from which to fetch the bitmap of dirty pages - * @flush:	true if TLB flush is needed by caller   */ -int kvm_clear_dirty_log_protect(struct kvm *kvm, -				struct kvm_clear_dirty_log *log, bool *flush) +static int kvm_clear_dirty_log_protect(struct kvm *kvm, +				       struct kvm_clear_dirty_log *log)  {  	struct kvm_memslots *slots;  	struct kvm_memory_slot *memslot; @@ -1311,6 +1516,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,  	unsigned long i, n;  	unsigned long *dirty_bitmap;  	unsigned long *dirty_bitmap_buffer; +	bool flush;  	as_id = log->slot >> 16;  	id = (u16)log->slot; @@ -1322,10 +1528,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,  	slots = __kvm_memslots(kvm, as_id);  	memslot = id_to_memslot(slots, id); +	if (!memslot || !memslot->dirty_bitmap) +		return -ENOENT;  	dirty_bitmap = memslot->dirty_bitmap; -	if (!dirty_bitmap) -		return -ENOENT;  	n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; @@ -1334,7 +1540,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,  	    (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))  	    return -EINVAL; -	*flush = false; +	kvm_arch_sync_dirty_log(kvm, memslot); + +	flush = false;  	dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);  	if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))  		return -EFAULT; @@ -1357,28 +1565,32 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,  		 * a problem if userspace sets them in log->dirty_bitmap.  		*/  		if (mask) { -			*flush = true; +			flush = true;  			kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,  								offset, mask);  		}  	}  	spin_unlock(&kvm->mmu_lock); +	if (flush) +		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); +  	return 0;  } -EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect); -#endif -bool kvm_largepages_enabled(void) +static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, +					struct kvm_clear_dirty_log *log)  { -	return largepages_enabled; -} +	int r; -void kvm_disable_largepages(void) -{ -	largepages_enabled = false; +	mutex_lock(&kvm->slots_lock); + +	r = kvm_clear_dirty_log_protect(kvm, log); + +	mutex_unlock(&kvm->slots_lock); +	return r;  } -EXPORT_SYMBOL_GPL(kvm_disable_largepages); +#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */  struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)  { @@ -1754,12 +1966,6 @@ kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)  }  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) -{ -	return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); -  kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)  {  	return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); @@ -3310,9 +3516,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)  	case KVM_CAP_IOEVENTFD_ANY_LENGTH:  	case KVM_CAP_CHECK_EXTENSION_VM:  	case KVM_CAP_ENABLE_CAP_VM: -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -	case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: -#endif  		return 1;  #ifdef CONFIG_KVM_MMIO  	case KVM_CAP_COALESCED_MMIO: @@ -3320,6 +3523,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)  	case KVM_CAP_COALESCED_PIO:  		return 1;  #endif +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +	case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: +		return KVM_DIRTY_LOG_MANUAL_CAPS; +#endif  #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING  	case KVM_CAP_IRQ_ROUTING:  		return KVM_MAX_IRQ_ROUTES; @@ -3347,11 +3554,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,  {  	switch (cap->cap) {  #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -	case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: -		if (cap->flags || (cap->args[0] & ~1)) +	case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: { +		u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE; + +		if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) +			allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS; + +		if (cap->flags || (cap->args[0] & ~allowed_options))  			return -EINVAL;  		kvm->manual_dirty_log_protect = cap->args[0];  		return 0; +	}  #endif  	default:  		return kvm_vm_ioctl_enable_cap(kvm, cap); @@ -4435,14 +4648,22 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)          return &kvm_running_vcpu;  } -static void check_processor_compat(void *rtn) +struct kvm_cpu_compat_check { +	void *opaque; +	int *ret; +}; + +static void check_processor_compat(void *data)  { -	*(int *)rtn = kvm_arch_check_processor_compat(); +	struct kvm_cpu_compat_check *c = data; + +	*c->ret = kvm_arch_check_processor_compat(c->opaque);  }  int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,  		  struct module *module)  { +	struct kvm_cpu_compat_check c;  	int r;  	int cpu; @@ -4466,12 +4687,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,  		goto out_free_0;  	} -	r = kvm_arch_hardware_setup(); +	r = kvm_arch_hardware_setup(opaque);  	if (r < 0)  		goto out_free_1; +	c.ret = &r; +	c.opaque = opaque;  	for_each_online_cpu(cpu) { -		smp_call_function_single(cpu, check_processor_compat, &r, 1); +		smp_call_function_single(cpu, check_processor_compat, &c, 1);  		if (r < 0)  			goto out_free_2;  	}  |