aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c132
1 files changed, 91 insertions, 41 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 5146387b416b..f0cf8ce26f01 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid);
* and once to do all the other items.
*/
+static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
+{
+ unsigned int nofs_flag;
+ struct inode *inode;
+
+ /*
+ * We're holding a transaction handle whether we are logging or
+ * replaying a log tree, so we must make sure NOFS semantics apply
+ * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL
+ * to allocate an inode, which can recurse back into the filesystem and
+ * attempt a transaction commit, resulting in a deadlock.
+ */
+ nofs_flag = memalloc_nofs_save();
+ inode = btrfs_iget(objectid, root);
+ memalloc_nofs_restore(nofs_flag);
+
+ return inode;
+}
+
/*
* start a sub transaction and setup the log tree
* this increments the log tree writer count to make the people
@@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
{
struct inode *inode;
- inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ inode = btrfs_iget_logging(objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -1625,7 +1644,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (ret)
goto out;
}
- BTRFS_I(inode)->index_cnt = (u64)-1;
+ if (S_ISDIR(inode->i_mode))
+ BTRFS_I(inode)->index_cnt = (u64)-1;
if (inode->i_nlink == 0) {
if (S_ISDIR(inode->i_mode)) {
@@ -2820,7 +2840,7 @@ static void wait_for_writer(struct btrfs_root *root)
finish_wait(&root->log_writer_wait, &wait);
}
-void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode)
{
ctx->log_ret = 0;
ctx->log_transid = 0;
@@ -2839,7 +2859,7 @@ void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
{
- struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+ struct btrfs_inode *inode = ctx->inode;
if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
@@ -2857,7 +2877,7 @@ void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *tmp;
- ASSERT(inode_is_locked(ctx->inode));
+ ASSERT(inode_is_locked(&ctx->inode->vfs_inode));
list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
list_del_init(&ordered->log_list);
@@ -4234,8 +4254,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, bool inode_item_dropped)
{
struct btrfs_inode_item *inode_item;
+ struct btrfs_key key;
int ret;
+ btrfs_get_inode_key(inode, &key);
/*
* If we are doing a fast fsync and the inode was logged before in the
* current transaction, then we know the inode was previously logged and
@@ -4247,7 +4269,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* already exists can also result in unnecessarily splitting a leaf.
*/
if (!inode_item_dropped && inode->logged_trans == trans->transid) {
- ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
+ ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
ASSERT(ret <= 0);
if (ret > 0)
ret = -ENOENT;
@@ -4261,7 +4283,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
* flags and set ->logged_trans to 0.
*/
- ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
+ ret = btrfs_insert_empty_item(trans, log, path, &key,
sizeof(*inode_item));
ASSERT(ret != -EEXIST);
}
@@ -4575,6 +4597,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
{
struct btrfs_ordered_extent *ordered;
struct btrfs_root *csum_root;
+ u64 block_start;
u64 csum_offset;
u64 csum_len;
u64 mod_start = em->start;
@@ -4584,7 +4607,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
if (inode->flags & BTRFS_INODE_NODATASUM ||
(em->flags & EXTENT_FLAG_PREALLOC) ||
- em->block_start == EXTENT_MAP_HOLE)
+ em->disk_bytenr == EXTENT_MAP_HOLE)
return 0;
list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
@@ -4648,17 +4671,18 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
/* If we're compressed we have to save the entire range of csums. */
if (extent_map_is_compressed(em)) {
csum_offset = 0;
- csum_len = max(em->block_len, em->orig_block_len);
+ csum_len = em->disk_num_bytes;
} else {
csum_offset = mod_start - em->start;
csum_len = mod_len;
}
/* block start is already adjusted for the file extent offset. */
- csum_root = btrfs_csum_root(trans->fs_info, em->block_start);
- ret = btrfs_lookup_csums_list(csum_root, em->block_start + csum_offset,
- em->block_start + csum_offset +
- csum_len - 1, &ordered_sums, false);
+ block_start = extent_map_block_start(em);
+ csum_root = btrfs_csum_root(trans->fs_info, block_start);
+ ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset,
+ block_start + csum_offset + csum_len - 1,
+ &ordered_sums, false);
if (ret < 0)
return ret;
ret = 0;
@@ -4688,7 +4712,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
enum btrfs_compression_type compress_type;
- u64 extent_offset = em->start - em->orig_start;
+ u64 extent_offset = em->offset;
+ u64 block_start = extent_map_block_start(em);
u64 block_len;
int ret;
@@ -4698,14 +4723,13 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
else
btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
- block_len = max(em->block_len, em->orig_block_len);
+ block_len = em->disk_num_bytes;
compress_type = extent_map_compression(em);
if (compress_type != BTRFS_COMPRESS_NONE) {
- btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start);
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
- } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
- btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start -
- extent_offset);
+ } else if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start - extent_offset);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
}
@@ -4860,18 +4884,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
path->slots[0]++;
continue;
}
- if (!dropped_extents) {
- /*
- * Avoid logging extent items logged in past fsync calls
- * and leading to duplicate keys in the log tree.
- */
+ /*
+ * Avoid overlapping items in the log tree. The first time we
+ * get here, get rid of everything from a past fsync. After
+ * that, if the current extent starts before the end of the last
+ * extent we copied, truncate the last one. This can happen if
+ * an ordered extent completion modifies the subvolume tree
+ * while btrfs_next_leaf() has the tree unlocked.
+ */
+ if (!dropped_extents || key.offset < truncate_offset) {
ret = truncate_inode_items(trans, root->log_root, inode,
- truncate_offset,
+ min(key.offset, truncate_offset),
BTRFS_EXTENT_DATA_KEY);
if (ret)
goto out;
dropped_extents = true;
}
+ truncate_offset = btrfs_file_extent_end(path);
if (ins_nr == 0)
start_slot = slot;
ins_nr++;
@@ -5433,7 +5462,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = start_inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
LIST_HEAD(dir_list);
struct btrfs_dir_list *dir_elem;
@@ -5494,7 +5522,7 @@ again:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
+ di_inode = btrfs_iget_logging(di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto out;
@@ -5554,7 +5582,7 @@ again:
btrfs_add_delayed_iput(curr_inode);
curr_inode = NULL;
- vfs_inode = btrfs_iget(fs_info->sb, ino, root);
+ vfs_inode = btrfs_iget_logging(ino, root);
if (IS_ERR(vfs_inode)) {
ret = PTR_ERR(vfs_inode);
break;
@@ -5649,7 +5677,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES)
return BTRFS_LOG_FORCE_COMMIT;
- inode = btrfs_iget(root->fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was deleted in
* the current transaction then we either:
@@ -5750,7 +5778,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
/*
@@ -5781,7 +5808,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
list_del(&curr->list);
kfree(curr);
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -5792,7 +5819,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (ret != -ENOENT)
break;
- inode = btrfs_iget(fs_info->sb, parent, root);
+ inode = btrfs_iget_logging(parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
break;
@@ -5905,7 +5932,7 @@ again:
if (ret < 0) {
return ret;
} else if (ret > 0 &&
- other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
+ other_ino != btrfs_ino(ctx->inode)) {
if (ins_nr > 0) {
ins_nr++;
} else {
@@ -6314,7 +6341,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
const bool orig_log_new_dentries = ctx->log_new_dentries;
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_item *item;
int ret = 0;
@@ -6340,7 +6366,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
if (key.type == BTRFS_ROOT_ITEM_KEY)
continue;
- di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root);
+ di_inode = btrfs_iget_logging(key.objectid, inode->root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
break;
@@ -6724,7 +6750,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6789,8 +6814,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
- root);
+ dir_inode = btrfs_iget_logging(inode_key.objectid, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -6852,7 +6876,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
while (true) {
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
int slot;
struct btrfs_key search_key;
@@ -6867,7 +6890,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -7056,6 +7079,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
}
/*
+ * If we're logging an inode from a subvolume created in the current
+ * transaction we must force a commit since the root is not persisted.
+ */
+ if (btrfs_root_generation(&root->root_item) == trans->transid) {
+ ret = BTRFS_LOG_FORCE_COMMIT;
+ goto end_no_trans;
+ }
+
+ /*
* Skip already logged inodes or inodes corresponding to tmpfiles
* (since logging them is pointless, a link count of 0 means they
* will never be accessible).
@@ -7436,6 +7468,24 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
}
/*
+ * Call this when creating a subvolume in a directory.
+ * Because we don't commit a transaction when creating a subvolume, we can't
+ * allow the directory pointing to the subvolume to be logged with an entry that
+ * points to an unpersisted root if we are still in the transaction used to
+ * create the subvolume, so make any attempt to log the directory to result in a
+ * full log sync.
+ * Also we don't need to worry with renames, since btrfs_rename() marks the log
+ * for full commit when renaming a subvolume.
+ */
+void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
+ struct btrfs_inode *dir)
+{
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
+}
+
+/*
* Update the log after adding a new name for an inode.
*
* @trans: Transaction handle.
@@ -7567,7 +7617,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
goto out;
}
- btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+ btrfs_init_log_ctx(&ctx, inode);
ctx.logging_new_name = true;
btrfs_init_log_ctx_scratch_eb(&ctx);
/*