From 5bc9ad78c2f836bd2fe9b5c911f8499364ee5b6e Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 18 Jul 2024 17:18:37 +0200 Subject: vfs: handle __wait_on_freeing_inode() and evict() race Lockless hash lookup can find and lock the inode after it gets the I_FREEING flag set, at which point it blocks waiting for teardown in evict() to finish. However, the flag is still set even after evict() wakes up all waiters. This results in a race where if the inode lock is taken late enough, it can happen after both hash removal and wakeups, meaning there is nobody to wake the racing thread up. This worked prior to RCU-based lookup because the entire ordeal was synchronized with the inode hash lock. Since unhashing requires the inode lock, we can safely check whether it happened after acquiring it. Link: https://lore.kernel.org/v9fs/20240717102458.649b60be@kernel.org/ Reported-by: Dominique Martinet Fixes: 7180f8d91fcb ("vfs: add rcu-based find_inode variants for iget ops") Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240718151838.611807-1-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/inode.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f356fe2ec2b6..05613745fad6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -676,6 +676,16 @@ static void evict(struct inode *inode) remove_inode_hash(inode); + /* + * Wake up waiters in __wait_on_freeing_inode(). + * + * Lockless hash lookup may end up finding the inode before we removed + * it above, but only lock it *after* we are done with the wakeup below. + * In this case the potential waiter cannot safely block. + * + * The inode being unhashed after the call to remove_inode_hash() is + * used as an indicator whether blocking on it is safe. + */ spin_lock(&inode->i_lock); wake_up_bit(&inode->i_state, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); @@ -2291,6 +2301,16 @@ static void __wait_on_freeing_inode(struct inode *inode, bool locked) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + + /* + * Handle racing against evict(), see that routine for more details. + */ + if (unlikely(inode_unhashed(inode))) { + WARN_ON(locked); + spin_unlock(&inode->i_lock); + return; + } + wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); -- cgit From f5e5e97c719d289025afce07050effcf1f7373ef Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 24 Jul 2024 10:50:33 +0200 Subject: inode: clarify what's locked In __wait_on_freeing_inode() we warn in case the inode_hash_lock is held but the inode is unhashed. We then release the inode_lock. So using "locked" as parameter name is confusing. Use is_inode_hash_locked as parameter name instead. Signed-off-by: Christian Brauner --- fs/inode.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 05613745fad6..470b57ef1cf5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -898,18 +898,18 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode, bool locked); +static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), - void *data, bool locked) + void *data, bool is_inode_hash_locked) { struct inode *inode = NULL; - if (locked) + if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); @@ -923,7 +923,7 @@ repeat: continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode, locked); + __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -946,11 +946,11 @@ repeat: */ static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino, - bool locked) + bool is_inode_hash_locked) { struct inode *inode = NULL; - if (locked) + if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); @@ -964,7 +964,7 @@ repeat: continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode, locked); + __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -2297,7 +2297,7 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ -static void __wait_on_freeing_inode(struct inode *inode, bool locked) +static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); @@ -2306,7 +2306,7 @@ static void __wait_on_freeing_inode(struct inode *inode, bool locked) * Handle racing against evict(), see that routine for more details. */ if (unlikely(inode_unhashed(inode))) { - WARN_ON(locked); + WARN_ON(is_inode_hash_locked); spin_unlock(&inode->i_lock); return; } @@ -2315,11 +2315,11 @@ static void __wait_on_freeing_inode(struct inode *inode, bool locked) prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); rcu_read_unlock(); - if (locked) + if (is_inode_hash_locked) spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wq_entry); - if (locked) + if (is_inode_hash_locked) spin_lock(&inode_hash_lock); rcu_read_lock(); } -- cgit