diff options
Diffstat (limited to 'mm/mmu_notifier.c')
| -rw-r--r-- | mm/mmu_notifier.c | 263 | 
1 files changed, 202 insertions, 61 deletions
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index b5670620aea0..7fde88695f35 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -21,17 +21,11 @@  /* global SRCU for all MMs */  DEFINE_STATIC_SRCU(srcu); -/* - * This function allows mmu_notifier::release callback to delay a call to - * a function that will free appropriate resources. The function must be - * quick and must not block. - */ -void mmu_notifier_call_srcu(struct rcu_head *rcu, -			    void (*func)(struct rcu_head *rcu)) -{ -	call_srcu(&srcu, rcu, func); -} -EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); +#ifdef CONFIG_LOCKDEP +struct lockdep_map __mmu_notifier_invalidate_range_start_map = { +	.name = "mmu_notifier_invalidate_range_start" +}; +#endif  /*   * This function can't run concurrently against mmu_notifier_register @@ -174,11 +168,19 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)  	id = srcu_read_lock(&srcu);  	hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {  		if (mn->ops->invalidate_range_start) { -			int _ret = mn->ops->invalidate_range_start(mn, range); +			int _ret; + +			if (!mmu_notifier_range_blockable(range)) +				non_block_start(); +			_ret = mn->ops->invalidate_range_start(mn, range); +			if (!mmu_notifier_range_blockable(range)) +				non_block_end();  			if (_ret) {  				pr_info("%pS callback failed with %d in %sblockable context.\n",  					mn->ops->invalidate_range_start, _ret,  					!mmu_notifier_range_blockable(range) ? "non-" : ""); +				WARN_ON(mmu_notifier_range_blockable(range) || +					ret != -EAGAIN);  				ret = _ret;  			}  		} @@ -187,7 +189,6 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)  	return ret;  } -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);  void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,  					 bool only_end) @@ -195,6 +196,7 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,  	struct mmu_notifier *mn;  	int id; +	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);  	id = srcu_read_lock(&srcu);  	hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {  		/* @@ -214,12 +216,17 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,  			mn->ops->invalidate_range(mn, range->mm,  						  range->start,  						  range->end); -		if (mn->ops->invalidate_range_end) +		if (mn->ops->invalidate_range_end) { +			if (!mmu_notifier_range_blockable(range)) +				non_block_start();  			mn->ops->invalidate_range_end(mn, range); +			if (!mmu_notifier_range_blockable(range)) +				non_block_end(); +		}  	}  	srcu_read_unlock(&srcu, id); +	lock_map_release(&__mmu_notifier_invalidate_range_start_map);  } -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);  void __mmu_notifier_invalidate_range(struct mm_struct *mm,  				  unsigned long start, unsigned long end) @@ -234,35 +241,49 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,  	}  	srcu_read_unlock(&srcu, id);  } -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range); -static int do_mmu_notifier_register(struct mmu_notifier *mn, -				    struct mm_struct *mm, -				    int take_mmap_sem) +/* + * Same as mmu_notifier_register but here the caller must hold the + * mmap_sem in write mode. + */ +int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)  { -	struct mmu_notifier_mm *mmu_notifier_mm; +	struct mmu_notifier_mm *mmu_notifier_mm = NULL;  	int ret; +	lockdep_assert_held_write(&mm->mmap_sem);  	BUG_ON(atomic_read(&mm->mm_users) <= 0); -	ret = -ENOMEM; -	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); -	if (unlikely(!mmu_notifier_mm)) -		goto out; +	if (IS_ENABLED(CONFIG_LOCKDEP)) { +		fs_reclaim_acquire(GFP_KERNEL); +		lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); +		lock_map_release(&__mmu_notifier_invalidate_range_start_map); +		fs_reclaim_release(GFP_KERNEL); +	} -	if (take_mmap_sem) -		down_write(&mm->mmap_sem); -	ret = mm_take_all_locks(mm); -	if (unlikely(ret)) -		goto out_clean; +	mn->mm = mm; +	mn->users = 1; + +	if (!mm->mmu_notifier_mm) { +		/* +		 * kmalloc cannot be called under mm_take_all_locks(), but we +		 * know that mm->mmu_notifier_mm can't change while we hold +		 * the write side of the mmap_sem. +		 */ +		mmu_notifier_mm = +			kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); +		if (!mmu_notifier_mm) +			return -ENOMEM; -	if (!mm_has_notifiers(mm)) {  		INIT_HLIST_HEAD(&mmu_notifier_mm->list);  		spin_lock_init(&mmu_notifier_mm->lock); - -		mm->mmu_notifier_mm = mmu_notifier_mm; -		mmu_notifier_mm = NULL;  	} + +	ret = mm_take_all_locks(mm); +	if (unlikely(ret)) +		goto out_clean; + +	/* Pairs with the mmdrop in mmu_notifier_unregister_* */  	mmgrab(mm);  	/* @@ -273,48 +294,118 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,  	 * We can't race against any other mmu notifier method either  	 * thanks to mm_take_all_locks().  	 */ +	if (mmu_notifier_mm) +		mm->mmu_notifier_mm = mmu_notifier_mm; +  	spin_lock(&mm->mmu_notifier_mm->lock);  	hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_mm->list);  	spin_unlock(&mm->mmu_notifier_mm->lock);  	mm_drop_all_locks(mm); +	BUG_ON(atomic_read(&mm->mm_users) <= 0); +	return 0; +  out_clean: -	if (take_mmap_sem) -		up_write(&mm->mmap_sem);  	kfree(mmu_notifier_mm); -out: -	BUG_ON(atomic_read(&mm->mm_users) <= 0);  	return ret;  } +EXPORT_SYMBOL_GPL(__mmu_notifier_register); -/* +/** + * mmu_notifier_register - Register a notifier on a mm + * @mn: The notifier to attach + * @mm: The mm to attach the notifier to + *   * Must not hold mmap_sem nor any other VM related lock when calling   * this registration function. Must also ensure mm_users can't go down   * to zero while this runs to avoid races with mmu_notifier_release,   * so mm has to be current->mm or the mm should be pinned safely such   * as with get_task_mm(). If the mm is not current->mm, the mm_users   * pin should be released by calling mmput after mmu_notifier_register - * returns. mmu_notifier_unregister must be always called to - * unregister the notifier. mm_count is automatically pinned to allow - * mmu_notifier_unregister to safely run at any time later, before or - * after exit_mmap. ->release will always be called before exit_mmap - * frees the pages. + * returns. + * + * mmu_notifier_unregister() or mmu_notifier_put() must be always called to + * unregister the notifier. + * + * While the caller has a mmu_notifier get the mn->mm pointer will remain + * valid, and can be converted to an active mm pointer via mmget_not_zero().   */  int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)  { -	return do_mmu_notifier_register(mn, mm, 1); +	int ret; + +	down_write(&mm->mmap_sem); +	ret = __mmu_notifier_register(mn, mm); +	up_write(&mm->mmap_sem); +	return ret;  }  EXPORT_SYMBOL_GPL(mmu_notifier_register); -/* - * Same as mmu_notifier_register but here the caller must hold the - * mmap_sem in write mode. +static struct mmu_notifier * +find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) +{ +	struct mmu_notifier *mn; + +	spin_lock(&mm->mmu_notifier_mm->lock); +	hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) { +		if (mn->ops != ops) +			continue; + +		if (likely(mn->users != UINT_MAX)) +			mn->users++; +		else +			mn = ERR_PTR(-EOVERFLOW); +		spin_unlock(&mm->mmu_notifier_mm->lock); +		return mn; +	} +	spin_unlock(&mm->mmu_notifier_mm->lock); +	return NULL; +} + +/** + * mmu_notifier_get_locked - Return the single struct mmu_notifier for + *                           the mm & ops + * @ops: The operations struct being subscribe with + * @mm : The mm to attach notifiers too + * + * This function either allocates a new mmu_notifier via + * ops->alloc_notifier(), or returns an already existing notifier on the + * list. The value of the ops pointer is used to determine when two notifiers + * are the same. + * + * Each call to mmu_notifier_get() must be paired with a call to + * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem. + * + * While the caller has a mmu_notifier get the mm pointer will remain valid, + * and can be converted to an active mm pointer via mmget_not_zero().   */ -int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, +					     struct mm_struct *mm)  { -	return do_mmu_notifier_register(mn, mm, 0); +	struct mmu_notifier *mn; +	int ret; + +	lockdep_assert_held_write(&mm->mmap_sem); + +	if (mm->mmu_notifier_mm) { +		mn = find_get_mmu_notifier(mm, ops); +		if (mn) +			return mn; +	} + +	mn = ops->alloc_notifier(mm); +	if (IS_ERR(mn)) +		return mn; +	mn->ops = ops; +	ret = __mmu_notifier_register(mn, mm); +	if (ret) +		goto out_free; +	return mn; +out_free: +	mn->ops->free_notifier(mn); +	return ERR_PTR(ret);  } -EXPORT_SYMBOL_GPL(__mmu_notifier_register); +EXPORT_SYMBOL_GPL(mmu_notifier_get_locked);  /* this is called after the last mmu_notifier_unregister() returned */  void __mmu_notifier_mm_destroy(struct mm_struct *mm) @@ -375,24 +466,74 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)  }  EXPORT_SYMBOL_GPL(mmu_notifier_unregister); -/* - * Same as mmu_notifier_unregister but no callback and no srcu synchronization. +static void mmu_notifier_free_rcu(struct rcu_head *rcu) +{ +	struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu); +	struct mm_struct *mm = mn->mm; + +	mn->ops->free_notifier(mn); +	/* Pairs with the get in __mmu_notifier_register() */ +	mmdrop(mm); +} + +/** + * mmu_notifier_put - Release the reference on the notifier + * @mn: The notifier to act on + * + * This function must be paired with each mmu_notifier_get(), it releases the + * reference obtained by the get. If this is the last reference then process + * to free the notifier will be run asynchronously. + * + * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release + * when the mm_struct is destroyed. Instead free_notifier is always called to + * release any resources held by the user. + * + * As ops->release is not guaranteed to be called, the user must ensure that + * all sptes are dropped, and no new sptes can be established before + * mmu_notifier_put() is called. + * + * This function can be called from the ops->release callback, however the + * caller must still ensure it is called pairwise with mmu_notifier_get(). + * + * Modules calling this function must call mmu_notifier_synchronize() in + * their __exit functions to ensure the async work is completed.   */ -void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, -					struct mm_struct *mm) +void mmu_notifier_put(struct mmu_notifier *mn)  { +	struct mm_struct *mm = mn->mm; +  	spin_lock(&mm->mmu_notifier_mm->lock); -	/* -	 * Can not use list_del_rcu() since __mmu_notifier_release -	 * can delete it before we hold the lock. -	 */ +	if (WARN_ON(!mn->users) || --mn->users) +		goto out_unlock;  	hlist_del_init_rcu(&mn->hlist);  	spin_unlock(&mm->mmu_notifier_mm->lock); -	BUG_ON(atomic_read(&mm->mm_count) <= 0); -	mmdrop(mm); +	call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu); +	return; + +out_unlock: +	spin_unlock(&mm->mmu_notifier_mm->lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_put); + +/** + * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed + * + * This function ensures that all outstanding async SRU work from + * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops + * associated with an unused mmu_notifier will no longer be called. + * + * Before using the caller must ensure that all of its mmu_notifiers have been + * fully released via mmu_notifier_put(). + * + * Modules using the mmu_notifier_put() API should call this in their __exit + * function to avoid module unloading races. + */ +void mmu_notifier_synchronize(void) +{ +	synchronize_srcu(&srcu);  } -EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); +EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);  bool  mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range)  |