aboutsummaryrefslogtreecommitdiff
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c177
1 files changed, 138 insertions, 39 deletions
diff --git a/fs/inode.c b/fs/inode.c
index f356fe2ec2b6..af78f515403f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -107,7 +107,7 @@ long get_nr_dirty_inodes(void)
*/
static struct inodes_stat_t inodes_stat;
-static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
+static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED;
}
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit)
+{
+ void *bit_address;
+
+ bit_address = inode_state_wait_address(inode, bit);
+ init_wait_var_entry(wqe, bit_address, 0);
+ return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -488,6 +499,49 @@ static void inode_lru_list_del(struct inode *inode)
this_cpu_dec(nr_unused);
}
+static void inode_pin_lru_isolating(struct inode *inode)
+{
+ lockdep_assert_held(&inode->i_lock);
+ WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
+ inode->i_state |= I_LRU_ISOLATING;
+}
+
+static void inode_unpin_lru_isolating(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
+ inode->i_state &= ~I_LRU_ISOLATING;
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_LRU_ISOLATING);
+ spin_unlock(&inode->i_lock);
+}
+
+static void inode_wait_for_lru_isolating(struct inode *inode)
+{
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ lockdep_assert_held(&inode->i_lock);
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ return;
+
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /*
+ * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+ * memory ordering.
+ */
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ break;
+ spin_unlock(&inode->i_lock);
+ schedule();
+ spin_lock(&inode->i_lock);
+ }
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
+}
+
/**
* inode_sb_list_add - add inode to the superblock list of inodes
* @inode: inode to add
@@ -562,6 +616,7 @@ void dump_mapping(const struct address_space *mapping)
struct hlist_node *dentry_first;
struct dentry *dentry_ptr;
struct dentry dentry;
+ char fname[64] = {};
unsigned long ino;
/*
@@ -598,11 +653,14 @@ void dump_mapping(const struct address_space *mapping)
return;
}
+ if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
+ strscpy(fname, "<invalid>");
/*
- * if dentry is corrupted, the %pd handler may still crash,
- * but it's unlikely that we reach here with a corrupt mapping
+ * Even if strncpy_from_kernel_nofault() succeeded,
+ * the fname could be unreliable
*/
- pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+ pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
+ a_ops, ino, fname);
}
void clear_inode(struct inode *inode)
@@ -657,6 +715,9 @@ static void evict(struct inode *inode)
inode_sb_list_del(inode);
+ spin_lock(&inode->i_lock);
+ inode_wait_for_lru_isolating(inode);
+
/*
* Wait for flusher thread to be done with the inode so that filesystem
* does not start destroying it while writeback is still running. Since
@@ -664,6 +725,7 @@ static void evict(struct inode *inode)
* the inode. We just have to wait for running writeback to finish.
*/
inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
if (op->evict_inode) {
op->evict_inode(inode);
@@ -676,8 +738,24 @@ static void evict(struct inode *inode)
remove_inode_hash(inode);
+ /*
+ * Wake up waiters in __wait_on_freeing_inode().
+ *
+ * Lockless hash lookup may end up finding the inode before we removed
+ * it above, but only lock it *after* we are done with the wakeup below.
+ * In this case the potential waiter cannot safely block.
+ *
+ * The inode being unhashed after the call to remove_inode_hash() is
+ * used as an indicator whether blocking on it is safe.
+ */
spin_lock(&inode->i_lock);
- wake_up_bit(&inode->i_state, __I_NEW);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -725,6 +803,10 @@ again:
continue;
spin_lock(&inode->i_lock);
+ if (atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
continue;
@@ -845,7 +927,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
* be under pressure before the cache inside the highmem zone.
*/
if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
- __iget(inode);
+ inode_pin_lru_isolating(inode);
spin_unlock(&inode->i_lock);
spin_unlock(lru_lock);
if (remove_inode_buffers(inode)) {
@@ -857,7 +939,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
__count_vm_events(PGINODESTEAL, reap);
mm_account_reclaimed_pages(reap);
}
- iput(inode);
+ inode_unpin_lru_isolating(inode);
spin_lock(lru_lock);
return LRU_RETRY;
}
@@ -888,18 +970,18 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
-static void __wait_on_freeing_inode(struct inode *inode, bool locked);
+static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
/*
* Called with the inode lock held.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
- void *data, bool locked)
+ void *data, bool is_inode_hash_locked)
{
struct inode *inode = NULL;
- if (locked)
+ if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
@@ -913,7 +995,7 @@ repeat:
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode, locked);
+ __wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
@@ -936,11 +1018,11 @@ repeat:
*/
static struct inode *find_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino,
- bool locked)
+ bool is_inode_hash_locked)
{
struct inode *inode = NULL;
- if (locked)
+ if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
@@ -954,7 +1036,7 @@ repeat:
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode, locked);
+ __wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
@@ -1085,8 +1167,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1097,8 +1184,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
@@ -1525,9 +1617,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
again:
- spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino, true);
- spin_unlock(&inode_hash_lock);
+ inode = find_inode_fast(sb, head, ino, false);
if (inode) {
if (IS_ERR(inode))
@@ -2287,19 +2377,29 @@ EXPORT_SYMBOL(inode_needs_sync);
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
* will DTRT.
*/
-static void __wait_on_freeing_inode(struct inode *inode, bool locked)
+static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ /*
+ * Handle racing against evict(), see that routine for more details.
+ */
+ if (unlikely(inode_unhashed(inode))) {
+ WARN_ON(is_inode_hash_locked);
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
- if (locked)
+ if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
- if (locked)
+ finish_wait(wq_head, &wqe.wq_entry);
+ if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
}
@@ -2447,18 +2547,11 @@ EXPORT_SYMBOL(inode_owner_or_capable);
/*
* Direct i/o helper functions
*/
-static void __inode_dio_wait(struct inode *inode)
+bool inode_dio_finished(const struct inode *inode)
{
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
- DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
- do {
- prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
- if (atomic_read(&inode->i_dio_count))
- schedule();
- } while (atomic_read(&inode->i_dio_count));
- finish_wait(wq, &q.wq_entry);
+ return atomic_read(&inode->i_dio_count) == 0;
}
+EXPORT_SYMBOL(inode_dio_finished);
/**
* inode_dio_wait - wait for outstanding DIO requests to finish
@@ -2472,11 +2565,17 @@ static void __inode_dio_wait(struct inode *inode)
*/
void inode_dio_wait(struct inode *inode)
{
- if (atomic_read(&inode->i_dio_count))
- __inode_dio_wait(inode);
+ wait_var_event(&inode->i_dio_count, inode_dio_finished(inode));
}
EXPORT_SYMBOL(inode_dio_wait);
+void inode_dio_wait_interruptible(struct inode *inode)
+{
+ wait_var_event_interruptible(&inode->i_dio_count,
+ inode_dio_finished(inode));
+}
+EXPORT_SYMBOL(inode_dio_wait_interruptible);
+
/*
* inode_set_flags - atomically set some inode flags
*