aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c160
1 files changed, 95 insertions, 65 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6d51db066503..7381241334e8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -313,7 +313,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
{
struct btrfs_delayed_item *item;
- item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
+ item = kmalloc(struct_size(item, data, data_len), GFP_NOFS);
if (item) {
item->data_len = data_len;
item->type = type;
@@ -328,7 +328,8 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
}
/*
- * __btrfs_lookup_delayed_item - look up the delayed item by key
+ * Look up the delayed item by key.
+ *
* @delayed_node: pointer to the delayed node
* @index: the dir index value to lookup (offset of a dir index key)
*
@@ -412,6 +413,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
+ struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root;
@@ -419,18 +421,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
if (RB_EMPTY_NODE(&delayed_item->rb_node))
return;
- delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
+ /* If it's in a rbtree, then we need to have delayed node locked. */
+ lockdep_assert_held(&delayed_node->mutex);
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
- root = &delayed_item->delayed_node->ins_root;
+ root = &delayed_node->ins_root;
else
- root = &delayed_item->delayed_node->del_root;
+ root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node);
- delayed_item->delayed_node->count--;
+ delayed_node->count--;
finish_one_item(delayed_root);
}
@@ -513,7 +518,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
/*
* For insertions we track reserved metadata space by accounting
* for the number of leaves that will be used, based on the delayed
- * node's index_items_size field.
+ * node's curr_index_batch_size and index_item_leaves fields.
*/
if (item->type == BTRFS_DELAYED_DELETION_ITEM)
item->bytes_reserved = num_bytes;
@@ -1026,7 +1031,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode_item);
write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item));
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
goto out;
@@ -1153,20 +1158,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
@@ -1361,8 +1379,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
return -ENOMEM;
async_work->delayed_root = delayed_root;
- btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
- NULL);
+ btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL);
async_work->nr = nr;
btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}
-/* Will return 0 or -ENOMEM */
+static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
+
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+ return;
+
+ /*
+ * Adding the new dir index item does not require touching another
+ * leaf, so we can release 1 unit of metadata that was previously
+ * reserved when starting the transaction. This applies only to
+ * the case where we had a transaction start and excludes the
+ * transaction join case (when replaying log trees).
+ */
+ trace_btrfs_space_reservation(fs_info, "transaction",
+ trans->transid, bytes, 0);
+ btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
+ ASSERT(trans->bytes_reserved >= bytes);
+ trans->bytes_reserved -= bytes;
+}
+
+/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
+ /*
+ * First attempt to insert the delayed item. This is to make the error
+ * handling path simpler in case we fail (-EEXIST). There's no risk of
+ * any other task coming in and running the delayed item before we do
+ * the metadata space reservation below, because we are holding the
+ * delayed node's mutex and that mutex must also be locked before the
+ * node's delayed items can be run.
+ */
+ ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
+ if (unlikely(ret)) {
+ btrfs_err(trans->fs_info,
+"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
+ name_len, name, index, btrfs_root_id(delayed_node->root),
+ delayed_node->inode_id, dir->index_cnt,
+ delayed_node->index_cnt, ret);
+ btrfs_release_delayed_item(delayed_item);
+ btrfs_release_dir_index_item_space(trans);
+ mutex_unlock(&delayed_node->mutex);
+ goto release_node;
+ }
+
if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len;
@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible.
*/
if (WARN_ON(ret)) {
- mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item);
+ mutex_unlock(&delayed_node->mutex);
goto release_node;
}
delayed_node->index_item_leaves++;
- } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
- const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
-
- /*
- * Adding the new dir index item does not require touching another
- * leaf, so we can release 1 unit of metadata that was previously
- * reserved when starting the transaction. This applies only to
- * the case where we had a transaction start and excludes the
- * transaction join case (when replaying log trees).
- */
- trace_btrfs_space_reservation(fs_info, "transaction",
- trans->transid, bytes, 0);
- btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
- ASSERT(trans->bytes_reserved >= bytes);
- trans->bytes_reserved -= bytes;
- }
-
- ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
- if (unlikely(ret)) {
- btrfs_err(trans->fs_info,
- "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
- name_len, name, delayed_node->root->root_key.objectid,
- delayed_node->inode_id, ret);
- BUG();
+ } else {
+ btrfs_release_dir_index_item_space(trans);
}
mutex_unlock(&delayed_node->mutex);
@@ -1722,8 +1760,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
}
/*
- * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
- *
+ * Read dir info stored in the delayed tree.
*/
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list)
@@ -1736,9 +1773,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
int over = 0;
unsigned char d_type;
- if (list_empty(ins_list))
- return 0;
-
/*
* Changing the data of the delayed item is impossible. So
* we needn't lock them. And we have held i_mutex of the
@@ -1799,24 +1833,22 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(&inode_item->atime,
- inode->i_atime.tv_sec);
+ inode_get_atime_sec(inode));
btrfs_set_stack_timespec_nsec(&inode_item->atime,
- inode->i_atime.tv_nsec);
+ inode_get_atime_nsec(inode));
btrfs_set_stack_timespec_sec(&inode_item->mtime,
- inode->i_mtime.tv_sec);
+ inode_get_mtime_sec(inode));
btrfs_set_stack_timespec_nsec(&inode_item->mtime,
- inode->i_mtime.tv_nsec);
+ inode_get_mtime_nsec(inode));
btrfs_set_stack_timespec_sec(&inode_item->ctime,
- inode->i_ctime.tv_sec);
+ inode_get_ctime_sec(inode));
btrfs_set_stack_timespec_nsec(&inode_item->ctime,
- inode->i_ctime.tv_nsec);
+ inode_get_ctime_nsec(inode));
- btrfs_set_stack_timespec_sec(&inode_item->otime,
- BTRFS_I(inode)->i_otime.tv_sec);
- btrfs_set_stack_timespec_nsec(&inode_item->otime,
- BTRFS_I(inode)->i_otime.tv_nsec);
+ btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_stack_timespec_nsec(&inode_item->otime, BTRFS_I(inode)->i_otime_nsec);
}
int btrfs_fill_inode(struct inode *inode, u32 *rdev)
@@ -1856,19 +1888,17 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
&BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
- inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
- inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
+ inode_set_atime(inode, btrfs_stack_timespec_sec(&inode_item->atime),
+ btrfs_stack_timespec_nsec(&inode_item->atime));
- inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
- inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
+ inode_set_mtime(inode, btrfs_stack_timespec_sec(&inode_item->mtime),
+ btrfs_stack_timespec_nsec(&inode_item->mtime));
- inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
- inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
+ inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
+ btrfs_stack_timespec_nsec(&inode_item->ctime));
- BTRFS_I(inode)->i_otime.tv_sec =
- btrfs_stack_timespec_sec(&inode_item->otime);
- BTRFS_I(inode)->i_otime.tv_nsec =
- btrfs_stack_timespec_nsec(&inode_item->otime);
+ BTRFS_I(inode)->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime);
+ BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime);
inode->i_generation = BTRFS_I(inode)->generation;
BTRFS_I(inode)->index_cnt = (u64)-1;
@@ -1879,9 +1909,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
}
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_inode *inode)
{
+ struct btrfs_root *root = inode->root;
struct btrfs_delayed_node *delayed_node;
int ret = 0;