aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c871
1 files changed, 564 insertions, 307 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f396a9afa403..01423670bc8a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -42,14 +42,16 @@
#include "file-item.h"
#include "orphan.h"
#include "tree-checker.h"
+#include "raid-stripe-tree.h"
#undef SCRAMBLE_DELAYED_REFS
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
+ u64 owner_offset,
struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
@@ -57,7 +59,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
- struct btrfs_key *ins, int ref_mod);
+ struct btrfs_key *ins, int ref_mod, u64 oref_root);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
@@ -69,27 +71,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
return (cache->flags & bits) == bits;
}
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
- u64 start, u64 num_bytes)
-{
- u64 end = start + num_bytes - 1;
- set_extent_bit(&fs_info->excluded_extents, start, end,
- EXTENT_UPTODATE, NULL);
- return 0;
-}
-
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
-{
- struct btrfs_fs_info *fs_info = cache->fs_info;
- u64 start, end;
-
- start = cache->start;
- end = start + cache->length - 1;
-
- clear_extent_bits(&fs_info->excluded_extents, start, end,
- EXTENT_UPTODATE);
-}
-
/* simple helper to search for an existing data extent at a given offset */
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
{
@@ -121,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 offset, int metadata, u64 *refs, u64 *flags)
+ u64 offset, int metadata, u64 *refs, u64 *flags,
+ u64 *owning_root)
{
struct btrfs_root *extent_root;
struct btrfs_delayed_ref_head *head;
@@ -133,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
u32 item_size;
u64 num_refs;
u64 extent_flags;
+ u64 owner = 0;
int ret;
/*
@@ -186,9 +169,13 @@ search_again:
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
+ owner = btrfs_get_extent_owner_root(fs_info, leaf,
+ path->slots[0]);
} else {
- ret = -EINVAL;
- btrfs_print_v0_err(fs_info);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
if (trans)
btrfs_abort_transaction(trans, ret);
else
@@ -243,6 +230,8 @@ out:
*refs = num_refs;
if (flags)
*flags = extent_flags;
+ if (owning_root)
+ *owning_root = owner;
out_free:
btrfs_free_path(path);
return ret;
@@ -363,9 +352,15 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
struct btrfs_extent_inline_ref *iref,
enum btrfs_inline_ref_type is_data)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
int type = btrfs_extent_inline_ref_type(eb, iref);
u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
+ if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
+ ASSERT(btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+ return type;
+ }
+
if (type == BTRFS_TREE_BLOCK_REF_KEY ||
type == BTRFS_SHARED_BLOCK_REF_KEY ||
type == BTRFS_SHARED_DATA_REF_KEY ||
@@ -374,26 +369,25 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
if (type == BTRFS_TREE_BLOCK_REF_KEY)
return type;
if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
- ASSERT(eb->fs_info);
+ ASSERT(fs_info);
/*
* Every shared one has parent tree block,
* which must be aligned to sector size.
*/
- if (offset &&
- IS_ALIGNED(offset, eb->fs_info->sectorsize))
+ if (offset && IS_ALIGNED(offset, fs_info->sectorsize))
return type;
}
} else if (is_data == BTRFS_REF_TYPE_DATA) {
if (type == BTRFS_EXTENT_DATA_REF_KEY)
return type;
if (type == BTRFS_SHARED_DATA_REF_KEY) {
- ASSERT(eb->fs_info);
+ ASSERT(fs_info);
/*
* Every shared one has parent tree block,
* which must be aligned to sector size.
*/
if (offset &&
- IS_ALIGNED(offset, eb->fs_info->sectorsize))
+ IS_ALIGNED(offset, fs_info->sectorsize))
return type;
}
} else {
@@ -402,11 +396,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
}
+ WARN_ON(1);
btrfs_print_leaf(eb);
- btrfs_err(eb->fs_info,
+ btrfs_err(fs_info,
"eb %llu iref 0x%lx invalid extent inline ref type %d",
eb->start, (unsigned long)iref, type);
- WARN_ON(1);
return BTRFS_REF_TYPE_INVALID;
}
@@ -418,11 +412,11 @@ u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
__le64 lenum;
lenum = cpu_to_le64(root_objectid);
- high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
+ high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(owner);
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(offset);
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
@@ -594,7 +588,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
}
}
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
ret = 0;
fail:
btrfs_release_path(path);
@@ -624,12 +618,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
- btrfs_print_v0_err(trans->fs_info);
- btrfs_abort_transaction(trans, -EINVAL);
- return -EINVAL;
} else {
- BUG();
+ btrfs_err(trans->fs_info,
+ "unrecognized backref key (%llu %u %llu)",
+ key.objectid, key.type, key.offset);
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ return -EUCLEAN;
}
BUG_ON(num_refs < refs_to_drop);
@@ -642,7 +636,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
}
return ret;
}
@@ -660,7 +654,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (iref) {
/*
* If type is invalid, we should have bailed out earlier than
@@ -809,7 +802,6 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
int type;
int want;
int ret;
- int err = 0;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
int needed;
@@ -836,10 +828,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
again:
ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
- if (ret < 0) {
- err = ret;
+ if (ret < 0)
goto out;
- }
/*
* We may be a newly converted file system which still has the old fat
@@ -866,19 +856,26 @@ again:
}
if (ret && !insert) {
- err = -ENOENT;
+ ret = -ENOENT;
goto out;
} else if (WARN_ON(ret)) {
- err = -EIO;
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+ bytenr, num_bytes, parent, root_objectid, owner,
+ offset);
+ ret = -EUCLEAN;
goto out;
}
leaf = path->nodes[0];
item_size = btrfs_item_size(leaf, path->slots[0]);
if (unlikely(item_size < sizeof(*ei))) {
- err = -EINVAL;
- btrfs_print_v0_err(fs_info);
- btrfs_abort_transaction(trans, err);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %llu expect >= %zu",
+ item_size, sizeof(*ei));
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -898,22 +895,17 @@ again:
else
needed = BTRFS_REF_TYPE_BLOCK;
- err = -ENOENT;
- while (1) {
- if (ptr >= end) {
- if (ptr > end) {
- err = -EUCLEAN;
- btrfs_print_leaf(path->nodes[0]);
- btrfs_crit(fs_info,
-"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
- path->slots[0], root_objectid, owner, offset, parent);
- }
- break;
- }
+ ret = -ENOENT;
+ while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
+ if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
+ ASSERT(btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+ ptr += btrfs_extent_inline_ref_size(type);
+ continue;
+ }
if (type == BTRFS_REF_TYPE_INVALID) {
- err = -EUCLEAN;
+ ret = -EUCLEAN;
goto out;
}
@@ -929,7 +921,7 @@ again:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
if (match_extent_data_ref(leaf, dref, root_objectid,
owner, offset)) {
- err = 0;
+ ret = 0;
break;
}
if (hash_extent_data_ref_item(leaf, dref) <
@@ -940,14 +932,14 @@ again:
ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
if (parent > 0) {
if (parent == ref_offset) {
- err = 0;
+ ret = 0;
break;
}
if (ref_offset < parent)
break;
} else {
if (root_objectid == ref_offset) {
- err = 0;
+ ret = 0;
break;
}
if (ref_offset < root_objectid)
@@ -956,10 +948,20 @@ again:
}
ptr += btrfs_extent_inline_ref_size(type);
}
- if (err == -ENOENT && insert) {
+
+ if (unlikely(ptr > end)) {
+ ret = -EUCLEAN;
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_crit(fs_info,
+"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
+ path->slots[0], root_objectid, owner, offset, parent);
+ goto out;
+ }
+
+ if (ret == -ENOENT && insert) {
if (item_size + extra_size >=
BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
- err = -EAGAIN;
+ ret = -EAGAIN;
goto out;
}
/*
@@ -971,7 +973,7 @@ again:
if (find_next_key(path, 0, &key) == 0 &&
key.objectid == bytenr &&
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
- err = -EAGAIN;
+ ret = -EAGAIN;
goto out;
}
}
@@ -982,14 +984,14 @@ out:
path->search_for_extension = 0;
btrfs_unlock_up_safe(path, 1);
}
- return err;
+ return ret;
}
/*
* helper to add new inline back ref
*/
static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
+void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
u64 parent, u64 root_objectid,
@@ -1012,7 +1014,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(path, size);
+ btrfs_extend_item(trans, path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1046,7 +1048,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
} else {
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
}
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
@@ -1079,13 +1081,15 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
/*
* helper to update/remove inline back ref
*/
-static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_path *path,
+static noinline_for_stack int update_inline_extent_backref(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1098,18 +1102,33 @@ void update_inline_extent_backref(struct btrfs_path *path,
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
- WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+ if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
+ struct btrfs_key key;
+ u32 extent_size;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ extent_size = fs_info->nodesize;
+ else
+ extent_size = key.offset;
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+ "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
+ key.objectid, extent_size, refs_to_mod, refs);
+ return -EUCLEAN;
+ }
refs += refs_to_mod;
btrfs_set_extent_refs(leaf, ei, refs);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
/*
- * If type is invalid, we should have bailed out after
- * lookup_inline_extent_backref().
+ * Function btrfs_get_extent_inline_ref_type() has already printed
+ * error messages.
*/
- type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
- ASSERT(type != BTRFS_REF_TYPE_INVALID);
+ if (unlikely(type == BTRFS_REF_TYPE_INVALID))
+ return -EUCLEAN;
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -1119,10 +1138,43 @@ void update_inline_extent_backref(struct btrfs_path *path,
refs = btrfs_shared_data_ref_count(leaf, sref);
} else {
refs = 1;
- BUG_ON(refs_to_mod != -1);
+ /*
+ * For tree blocks we can only drop one ref for it, and tree
+ * blocks should not have refs > 1.
+ *
+ * Furthermore if we're inserting a new inline backref, we
+ * won't reach this path either. That would be
+ * setup_inline_extent_backref().
+ */
+ if (unlikely(refs_to_mod != -1)) {
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+ "invalid refs_to_mod for tree block %llu, has %d expect -1",
+ key.objectid, refs_to_mod);
+ return -EUCLEAN;
+ }
}
- BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+ if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
+ struct btrfs_key key;
+ u32 extent_size;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ extent_size = fs_info->nodesize;
+ else
+ extent_size = key.offset;
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
+ (unsigned long)iref, key.objectid, extent_size,
+ refs_to_mod, refs);
+ return -EUCLEAN;
+ }
refs += refs_to_mod;
if (refs > 0) {
@@ -1139,9 +1191,10 @@ void update_inline_extent_backref(struct btrfs_path *path,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(path, item_size, 1);
+ btrfs_truncate_item(trans, path, item_size, 1);
}
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
+ return 0;
}
static noinline_for_stack
@@ -1170,9 +1223,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
bytenr, num_bytes, root_objectid, path->slots[0]);
return -EUCLEAN;
}
- update_inline_extent_backref(path, iref, refs_to_add, extent_op);
+ ret = update_inline_extent_backref(trans, path, iref,
+ refs_to_add, extent_op);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(trans->fs_info, path, iref, parent,
+ setup_inline_extent_backref(trans, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1190,7 +1244,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref)
- update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+ ret = update_inline_extent_backref(trans, path, iref,
+ -refs_to_drop, NULL);
else if (is_data)
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
else
@@ -1386,7 +1441,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
generic_ref->action);
BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
- generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
+ generic_ref->tree_ref.ref_root == BTRFS_TREE_LOG_OBJECTID);
if (generic_ref->type == BTRFS_REF_METADATA)
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
@@ -1399,7 +1454,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
}
/*
- * __btrfs_inc_extent_ref - insert backreference for a given extent
+ * Insert backreference for a given extent.
*
* The counterpart is in __btrfs_free_extent(), with examples and more details
* how it works.
@@ -1429,8 +1484,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
* always passed as 0. For data extents it is the fileoffset
* this extent belongs to.
*
- * @refs_to_add Number of references to add
- *
* @extent_op Pointer to a structure, holding information necessary when
* updating a tree block's flags
*
@@ -1438,7 +1491,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int refs_to_add,
+ u64 owner, u64 offset,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_path *path;
@@ -1448,6 +1501,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
u64 refs;
+ int refs_to_add = node->ref_mod;
int ret;
path = btrfs_alloc_path();
@@ -1474,19 +1528,18 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
btrfs_release_path(path);
/* now insert the actual backref */
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- BUG_ON(refs_to_add != 1);
+ if (owner < BTRFS_FIRST_FREE_OBJECTID)
ret = insert_tree_block_ref(trans, path, bytenr, parent,
root_objectid);
- } else {
+ else
ret = insert_extent_data_ref(trans, path, bytenr, parent,
root_objectid, owner, offset,
refs_to_add);
- }
+
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -1494,45 +1547,72 @@ out:
return ret;
}
+static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_head *href)
+{
+ u64 root = href->owning_root;
+
+ /*
+ * Don't check must_insert_reserved, as this is called from contexts
+ * where it has already been unset.
+ */
+ if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE ||
+ !href->is_data || !is_fstree(root))
+ return;
+
+ btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes,
+ BTRFS_QGROUP_RSV_DATA);
+}
+
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
{
int ret = 0;
struct btrfs_delayed_data_ref *ref;
- struct btrfs_key ins;
u64 parent = 0;
- u64 ref_root = 0;
u64 flags = 0;
- ins.objectid = node->bytenr;
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
-
ref = btrfs_delayed_node_to_data_ref(node);
trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
- ref_root = ref->root;
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+ struct btrfs_key key;
+ struct btrfs_squota_delta delta = {
+ .root = href->owning_root,
+ .num_bytes = node->num_bytes,
+ .is_data = true,
+ .is_inc = true,
+ .generation = trans->transid,
+ };
+
if (extent_op)
flags |= extent_op->flags_to_set;
- ret = alloc_reserved_file_extent(trans, parent, ref_root,
+
+ key.objectid = node->bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = node->num_bytes;
+
+ ret = alloc_reserved_file_extent(trans, parent, ref->root,
flags, ref->objectid,
- ref->offset, &ins,
- node->ref_mod);
+ ref->offset, &key,
+ node->ref_mod, href->owning_root);
+ free_head_ref_squota_rsv(trans->fs_info, href);
+ if (!ret)
+ ret = btrfs_record_squota_delta(trans->fs_info, &delta);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+ ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root,
ref->objectid, ref->offset,
- node->ref_mod, extent_op);
+ extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, node, parent,
- ref_root, ref->objectid,
- ref->offset, node->ref_mod,
- extent_op);
+ ret = __btrfs_free_extent(trans, href, node, parent,
+ ref->root, ref->objectid,
+ ref->offset, extent_op);
} else {
BUG();
}
@@ -1569,7 +1649,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
u32 item_size;
int ret;
- int err = 0;
int metadata = 1;
if (TRANS_ABORTED(trans))
@@ -1596,10 +1675,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
again:
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
- err = ret;
goto out;
- }
- if (ret > 0) {
+ } else if (ret > 0) {
if (metadata) {
if (path->slots[0] > 0) {
path->slots[0]--;
@@ -1620,7 +1697,10 @@ again:
goto again;
}
} else {
- err = -EIO;
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "missing extent item for extent %llu num_bytes %llu level %d",
+ head->bytenr, head->num_bytes, extent_op->level);
goto out;
}
}
@@ -1629,27 +1709,31 @@ again:
item_size = btrfs_item_size(leaf, path->slots[0]);
if (unlikely(item_size < sizeof(*ei))) {
- err = -EINVAL;
- btrfs_print_v0_err(fs_info);
- btrfs_abort_transaction(trans, err);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
__run_delayed_extent_op(extent_op, leaf, ei);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
out:
btrfs_free_path(path);
- return err;
+ return ret;
}
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
{
int ret = 0;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_tree_ref *ref;
u64 parent = 0;
u64 ref_root = 0;
@@ -1661,22 +1745,32 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- if (node->ref_mod != 1) {
+ if (unlikely(node->ref_mod != 1)) {
btrfs_err(trans->fs_info,
- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
- return -EIO;
+ return -EUCLEAN;
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+ struct btrfs_squota_delta delta = {
+ .root = href->owning_root,
+ .num_bytes = fs_info->nodesize,
+ .is_data = false,
+ .is_inc = true,
+ .generation = trans->transid,
+ };
+
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, node, extent_op);
+ if (!ret)
+ btrfs_record_squota_delta(fs_info, &delta);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
- ref->level, 0, 1, extent_op);
+ ref->level, 0, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, node, parent, ref_root,
- ref->level, 0, 1, extent_op);
+ ret = __btrfs_free_extent(trans, href, node, parent, ref_root,
+ ref->level, 0, extent_op);
} else {
BUG();
}
@@ -1685,6 +1779,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
@@ -1692,19 +1787,23 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
int ret = 0;
if (TRANS_ABORTED(trans)) {
- if (insert_reserved)
+ if (insert_reserved) {
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
+ free_head_ref_squota_rsv(trans->fs_info, href);
+ }
return 0;
}
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY)
- ret = run_delayed_tree_ref(trans, node, extent_op,
+ ret = run_delayed_tree_ref(trans, href, node, extent_op,
insert_reserved);
else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
- ret = run_delayed_data_ref(trans, node, extent_op,
+ ret = run_delayed_data_ref(trans, href, node, extent_op,
insert_reserved);
+ else if (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
+ ret = 0;
else
BUG();
if (ret && insert_reserved)
@@ -1783,28 +1882,38 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
return ret ? ret : 1;
}
-void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
- int nr_items = 1; /* Dropping this ref head update. */
+ u64 ret = 0;
/*
* We had csum deletions accounted for in our delayed refs rsv, we need
* to drop the csum leaves for this update from our delayed_refs_rsv.
*/
if (head->total_ref_mod < 0 && head->is_data) {
+ int nr_csums;
+
spin_lock(&delayed_refs->lock);
delayed_refs->pending_csums -= head->num_bytes;
spin_unlock(&delayed_refs->lock);
- nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
+ nr_csums = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
+
+ btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums);
+
+ ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
}
+ /* must_insert_reserved can be set only if we didn't run the head ref. */
+ if (head->must_insert_reserved)
+ free_head_ref_squota_rsv(fs_info, head);
- btrfs_delayed_refs_rsv_release(fs_info, nr_items);
+ return ret;
}
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
+ struct btrfs_delayed_ref_head *head,
+ u64 *bytes_released)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1849,7 +1958,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
}
}
- btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
+ *bytes_released += btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
@@ -1891,7 +2000,8 @@ static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
}
static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *locked_ref)
+ struct btrfs_delayed_ref_head *locked_ref,
+ u64 *bytes_released)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -1939,14 +2049,22 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
+ /*
+ * Unsetting this on the head ref relinquishes ownership of
+ * the rsv_bytes, so it is critical that every possible code
+ * path from here forward frees all reserves including qgroup
+ * reserve.
+ */
locked_ref->must_insert_reserved = false;
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
spin_unlock(&locked_ref->lock);
- ret = run_one_delayed_ref(trans, ref, extent_op,
+ ret = run_one_delayed_ref(trans, locked_ref, ref, extent_op,
must_insert_reserved);
+ btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
+ *bytes_released += btrfs_calc_delayed_ref_bytes(fs_info, 1);
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
@@ -1970,15 +2088,22 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* Returns -ENOMEM or -EIO on failure and will abort the transaction.
*/
static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- unsigned long nr)
+ u64 min_bytes)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_head *locked_ref = NULL;
int ret;
unsigned long count = 0;
+ unsigned long max_count = 0;
+ u64 bytes_processed = 0;
delayed_refs = &trans->transaction->delayed_refs;
+ if (min_bytes == 0) {
+ max_count = delayed_refs->num_heads_ready;
+ min_bytes = U64_MAX;
+ }
+
do {
if (!locked_ref) {
locked_ref = btrfs_obtain_ref_head(trans);
@@ -2006,7 +2131,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
spin_lock(&locked_ref->lock);
btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
- ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
+ ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, &bytes_processed);
if (ret < 0 && ret != -EAGAIN) {
/*
* Error, btrfs_run_delayed_refs_for_head already
@@ -2018,7 +2143,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* Success, perform the usual cleanup of a processed
* head
*/
- ret = cleanup_ref_head(trans, locked_ref);
+ ret = cleanup_ref_head(trans, locked_ref, &bytes_processed);
if (ret > 0 ) {
/* We dropped our lock, we need to loop. */
ret = 0;
@@ -2035,7 +2160,9 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
locked_ref = NULL;
cond_resched();
- } while ((nr != -1 && count < nr) || locked_ref);
+ } while ((min_bytes != U64_MAX && bytes_processed < min_bytes) ||
+ (max_count > 0 && count < max_count) ||
+ locked_ref);
return 0;
}
@@ -2084,24 +2211,25 @@ static u64 find_middle(struct rb_root *root)
#endif
/*
- * this starts processing the delayed reference count updates and
- * extent insertions we have queued up so far. count can be
- * 0, which means to process everything in the tree at the start
- * of the run (but not newly added entries), or it can be some target
- * number you'd like to process.
+ * Start processing the delayed reference count updates and extent insertions
+ * we have queued up so far.
+ *
+ * @trans: Transaction handle.
+ * @min_bytes: How many bytes of delayed references to process. After this
+ * many bytes we stop processing delayed references if there are
+ * any more. If 0 it means to run all existing delayed references,
+ * but not new ones added after running all existing ones.
+ * Use (u64)-1 (U64_MAX) to run all existing delayed references
+ * plus any new ones that are added.
*
* Returns 0 on success or if called with an aborted transaction
* Returns <0 on error and aborts the transaction
*/
-int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- unsigned long count)
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, u64 min_bytes)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_head *head;
int ret;
- int run_all = count == (unsigned long)-1;
/* We'll clean this up in btrfs_cleanup_transaction */
if (TRANS_ABORTED(trans))
@@ -2111,42 +2239,30 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
return 0;
delayed_refs = &trans->transaction->delayed_refs;
- if (count == 0)
- count = delayed_refs->num_heads_ready;
-
again:
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
- ret = __btrfs_run_delayed_refs(trans, count);
+ ret = __btrfs_run_delayed_refs(trans, min_bytes);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
- if (run_all) {
+ if (min_bytes == U64_MAX) {
btrfs_create_pending_block_groups(trans);
spin_lock(&delayed_refs->lock);
- node = rb_first_cached(&delayed_refs->href_root);
- if (!node) {
+ if (RB_EMPTY_ROOT(&delayed_refs->href_root.rb_root)) {
spin_unlock(&delayed_refs->lock);
- goto out;
+ return 0;
}
- head = rb_entry(node, struct btrfs_delayed_ref_head,
- href_node);
- refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
- /* Mutex was contended, block until it's released and retry. */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
-
- btrfs_put_delayed_ref_head(head);
cond_resched();
goto again;
}
-out:
+
return 0;
}
@@ -2271,6 +2387,7 @@ static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_extent_item *ei;
struct btrfs_key key;
u32 item_size;
+ u32 expected_size;
int type;
int ret;
@@ -2297,10 +2414,22 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = 1;
item_size = btrfs_item_size(leaf, path->slots[0]);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+ expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
+
+ /* No inline refs; we need to bail before checking for owner ref. */
+ if (item_size == sizeof(*ei))
+ goto out;
+
+ /* Check for an owner ref; skip over it to the real inline refs. */
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
+ if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
+ expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
+ iref = (struct btrfs_extent_inline_ref *)(iref + 1);
+ }
/* If extent item has more than 1 inline ref then it's shared */
- if (item_size != sizeof(*ei) +
- btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
+ if (item_size != expected_size)
goto out;
/*
@@ -2312,8 +2441,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
btrfs_root_last_snapshot(&root->root_item)))
goto out;
- iref = (struct btrfs_extent_inline_ref *)(ei + 1);
-
/* If this extent has SHARED_DATA_REF then it's shared */
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
if (type != BTRFS_EXTENT_DATA_REF_KEY)
@@ -2410,7 +2537,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
key.offset -= btrfs_file_extent_offset(buf, fi);
btrfs_init_generic_ref(&generic_ref, action, bytenr,
- num_bytes, parent);
+ num_bytes, parent, ref_root);
btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
key.offset, root->root_key.objectid,
for_reloc);
@@ -2423,8 +2550,9 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = fs_info->nodesize;
+ /* We don't know the owning_root, use 0. */
btrfs_init_generic_ref(&generic_ref, action, bytenr,
- num_bytes, parent);
+ num_bytes, parent, 0);
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
root->root_key.objectid, for_reloc);
if (inc)
@@ -2525,16 +2653,13 @@ int btrfs_pin_extent(struct btrfs_trans_handle *trans,
return 0;
}
-/*
- * this function must be called within transaction
- */
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes)
+ const struct extent_buffer *eb)
{
struct btrfs_block_group *cache;
int ret;
- cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
+ cache = btrfs_lookup_block_group(trans->fs_info, eb->start);
if (!cache)
return -EINVAL;
@@ -2546,10 +2671,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- pin_down_extent(trans, cache, bytenr, num_bytes, 0);
+ pin_down_extent(trans, cache, eb->start, eb->len, 0);
/* remove us from the free space cache (if we're there at all) */
- ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
+ ret = btrfs_remove_free_space(cache, eb->start, eb->len);
out:
btrfs_put_block_group(cache);
return ret;
@@ -2751,9 +2876,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
struct extent_state *cached_state = NULL;
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state);
- if (ret) {
+ if (!find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
@@ -2805,12 +2929,61 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
return 0;
}
+/*
+ * Parse an extent item's inline extents looking for a simple quotas owner ref.
+ *
+ * @fs_info: the btrfs_fs_info for this mount
+ * @leaf: a leaf in the extent tree containing the extent item
+ * @slot: the slot in the leaf where the extent item is found
+ *
+ * Returns the objectid of the root that originally allocated the extent item
+ * if the inline owner ref is expected and present, otherwise 0.
+ *
+ * If an extent item has an owner ref item, it will be the first inline ref
+ * item. Therefore the logic is to check whether there are any inline ref
+ * items, then check the type of the first one.
+ */
+u64 btrfs_get_extent_owner_root(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf, int slot)
+{
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ struct btrfs_extent_owner_ref *oref;
+ unsigned long ptr;
+ unsigned long end;
+ int type;
+
+ if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA))
+ return 0;
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + btrfs_item_size(leaf, slot);
+
+ /* No inline ref items of any kind, can't check type. */
+ if (ptr == end)
+ return 0;
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
+
+ /* We found an owner ref, get the root out of it. */
+ if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
+ oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
+ return btrfs_extent_owner_ref_root_id(leaf, oref);
+ }
+
+ /* We have inline refs, but not an owner ref. */
+ return 0;
+}
+
static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, bool is_data)
+ u64 bytenr, struct btrfs_squota_delta *delta)
{
int ret;
+ u64 num_bytes = delta->num_bytes;
- if (is_data) {
+ if (delta->is_data) {
struct btrfs_root *csum_root;
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
@@ -2819,6 +2992,18 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
return ret;
}
+
+ ret = btrfs_delete_raid_extent(trans, bytenr, num_bytes);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ }
+
+ ret = btrfs_record_squota_delta(trans->fs_info, delta);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
}
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
@@ -2901,9 +3086,10 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
* And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
*/
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
+ u64 owner_offset,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *info = trans->fs_info;
@@ -2918,11 +3104,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
int extent_slot = 0;
int found_extent = 0;
int num_to_del = 1;
+ int refs_to_drop = node->ref_mod;
u32 item_size;
u64 refs;
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
+ u64 delayed_ref_root = href->owning_root;
extent_root = btrfs_extent_root(info, bytenr);
ASSERT(extent_root);
@@ -3059,8 +3247,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item_size = btrfs_item_size(leaf, extent_slot);
if (unlikely(item_size < sizeof(*ei))) {
- ret = -EINVAL;
- btrfs_print_v0_err(info);
+ ret = -EUCLEAN;
+ btrfs_err(trans->fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3110,7 +3300,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
} else {
btrfs_set_extent_refs(leaf, ei, refs);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
}
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
@@ -3121,6 +3311,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
} else {
+ struct btrfs_squota_delta delta = {
+ .root = delayed_ref_root,
+ .num_bytes = num_bytes,
+ .is_data = is_data,
+ .is_inc = false,
+ .generation = btrfs_extent_generation(leaf, ei),
+ };
+
/* In this branch refs == 1 */
if (found_extent) {
if (is_data && refs_to_drop !=
@@ -3159,6 +3357,16 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
num_to_del = 2;
}
}
+ /*
+ * We can't infer the data owner from the delayed ref, so we need
+ * to try to get it from the owning ref item.
+ *
+ * If it is not present, then that extent was not written under
+ * simple quotas mode, so we don't need to account for its deletion.
+ */
+ if (is_data)
+ delta.root = btrfs_get_extent_owner_root(trans->fs_info,
+ leaf, extent_slot);
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
@@ -3168,7 +3376,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
+ ret = do_free_extent_accounting(trans, bytenr, &delta);
}
btrfs_release_path(path);
@@ -3242,7 +3450,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
int ret;
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
- buf->start, buf->len, parent);
+ buf->start, buf->len, parent, btrfs_header_owner(buf));
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
root_id, 0, false);
@@ -3329,10 +3537,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
* tree, just update pinning info and exit early.
*/
if ((ref->type == BTRFS_REF_METADATA &&
- ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
+ ref->tree_ref.ref_root == BTRFS_TREE_LOG_OBJECTID) ||
(ref->type == BTRFS_REF_DATA &&
- ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
- /* unlocks the pinned mutex */
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
ret = 0;
} else if (ref->type == BTRFS_REF_METADATA) {
@@ -3342,20 +3549,47 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
}
if (!((ref->type == BTRFS_REF_METADATA &&
- ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
+ ref->tree_ref.ref_root == BTRFS_TREE_LOG_OBJECTID) ||
(ref->type == BTRFS_REF_DATA &&
- ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
btrfs_ref_tree_mod(fs_info, ref);
return ret;
}
enum btrfs_loop_type {
+ /*
+ * Start caching block groups but do not wait for progress or for them
+ * to be done.
+ */
LOOP_CACHING_NOWAIT,
+
+ /*
+ * Wait for the block group free_space >= the space we're waiting for if
+ * the block group isn't cached.
+ */
LOOP_CACHING_WAIT,
+
+ /*
+ * Allow allocations to happen from block groups that do not yet have a
+ * size classification.
+ */
LOOP_UNSET_SIZE_CLASS,
+
+ /*
+ * Allocate a chunk and then retry the allocation.
+ */
LOOP_ALLOC_CHUNK,
+
+ /*
+ * Ignore the size class restrictions for this allocation.
+ */
LOOP_WRONG_SIZE_CLASS,
+
+ /*
+ * Ignore the empty size, only try to allocate the number of bytes
+ * needed for this allocation.
+ */
LOOP_NO_EMPTY_SIZE,
};
@@ -3427,7 +3661,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
* Helper function for find_free_extent().
*
* Return -ENOENT to inform caller that we need fallback to unclustered mode.
- * Return -EAGAIN to inform caller that we need to re-search this block group
* Return >0 to inform caller that we find nothing
* Return 0 means we have found a location and set ffe_ctl->found_offset.
*/
@@ -3508,14 +3741,6 @@ refill_cluster:
trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
return 0;
}
- } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
- !ffe_ctl->retry_clustered) {
- spin_unlock(&last_ptr->refill_lock);
-
- ffe_ctl->retry_clustered = true;
- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
- ffe_ctl->empty_cluster + ffe_ctl->empty_size);
- return -EAGAIN;
}
/*
* At this point we either didn't find a cluster or we weren't able to
@@ -3530,7 +3755,6 @@ refill_cluster:
/*
* Return >0 to inform caller that we find nothing
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
- * Return -EAGAIN to inform caller that we need to re-search this block group
*/
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
struct find_free_extent_ctl *ffe_ctl)
@@ -3568,25 +3792,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
ffe_ctl->num_bytes, ffe_ctl->empty_size,
&ffe_ctl->max_extent_size);
-
- /*
- * If we didn't find a chunk, and we haven't failed on this block group
- * before, and this block group is in the middle of caching and we are
- * ok with waiting, then go ahead and wait for progress to be made, and
- * set @retry_unclustered to true.
- *
- * If @retry_unclustered is true then we've already waited on this
- * block group once and should move on to the next block group.
- */
- if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
- ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
- ffe_ctl->empty_size);
- ffe_ctl->retry_unclustered = true;
- return -EAGAIN;
- } else if (!offset) {
+ if (!offset)
return 1;
- }
ffe_ctl->found_offset = offset;
return 0;
}
@@ -3600,7 +3807,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
/* We want to try and use the cluster allocator, so lets look there */
if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
- if (ret >= 0 || ret == -EAGAIN)
+ if (ret >= 0)
return ret;
/* ret == -ENOENT case falls through */
}
@@ -3685,7 +3892,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
}
spin_unlock(&block_group->lock);
- if (!ret && !btrfs_zone_activate(block_group)) {
+ /* Metadata block group is activated at write time. */
+ if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
+ !btrfs_zone_activate(block_group)) {
ret = 1;
/*
* May need to clear fs_info->{treelog,data_reloc}_bg.
@@ -3709,7 +3918,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
fs_info->data_reloc_bg == 0);
if (block_group->ro ||
- test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ (!ffe_ctl->for_data_reloc &&
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
ret = 1;
goto out;
}
@@ -3752,8 +3962,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
fs_info->treelog_bg = block_group->start;
- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
- fs_info->data_reloc_bg = block_group->start;
+ if (ffe_ctl->for_data_reloc) {
+ if (!fs_info->data_reloc_bg)
+ fs_info->data_reloc_bg = block_group->start;
+ /*
+ * Do not allow allocations from this block group, unless it is
+ * for data relocation. Compared to increasing the ->ro, setting
+ * the ->zoned_data_reloc_ongoing flag still allows nocow
+ * writers to come in. See btrfs_inc_nocow_writers().
+ *
+ * We need to disable an allocation to avoid an allocation of
+ * regular (non-relocation data) extent. With mix of relocation
+ * extents and regular extents, we can dispatch WRITE commands
+ * (for relocation extents) and ZONE APPEND commands (for
+ * regular extents) at the same time to the same zone, which
+ * easily break the write pointer.
+ *
+ * Also, this flag avoids this block group to be zone finished.
+ */
+ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+ }
ffe_ctl->found_offset = start + block_group->alloc_offset;
block_group->alloc_offset += num_bytes;
@@ -3771,24 +3999,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out:
if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0;
- if (ret && ffe_ctl->for_data_reloc &&
- fs_info->data_reloc_bg == block_group->start) {
- /*
- * Do not allow further allocations from this block group.
- * Compared to increasing the ->ro, setting the
- * ->zoned_data_reloc_ongoing flag still allows nocow
- * writers to come in. See btrfs_inc_nocow_writers().
- *
- * We need to disable an allocation to avoid an allocation of
- * regular (non-relocation data) extent. With mix of relocation
- * extents and regular extents, we can dispatch WRITE commands
- * (for relocation extents) and ZONE APPEND commands (for
- * regular extents) at the same time to the same zone, which
- * easily break the write pointer.
- */
- set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+ if (ret && ffe_ctl->for_data_reloc)
fs_info->data_reloc_bg = 0;
- }
spin_unlock(&fs_info->relocation_bg_lock);
spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
@@ -3816,8 +4028,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
{
switch (ffe_ctl->policy) {
case BTRFS_EXTENT_ALLOC_CLUSTERED:
- ffe_ctl->retry_clustered = false;
- ffe_ctl->retry_unclustered = false;
+ ffe_ctl->retry_uncached = false;
break;
case BTRFS_EXTENT_ALLOC_ZONED:
/* Nothing to do */
@@ -3861,6 +4072,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
+ /* Block group's activeness is not a requirement for METADATA block groups. */
+ if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
+ return 0;
+
/* If we can activate new zone, just allocate a chunk and use it */
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return 0;
@@ -3949,15 +4164,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
return 1;
- /*
- * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
- * caching kthreads as we move along
- * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
- * LOOP_UNSET_SIZE_CLASS, allow unset size class
- * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
- * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
- * again
- */
+ /* See the comments for btrfs_loop_type for an explanation of the phases. */
if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
ffe_ctl->index = 0;
/*
@@ -4168,9 +4375,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->orig_have_caching_bg = false;
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
ffe_ctl->loop = 0;
- /* For clustered allocation */
- ffe_ctl->retry_clustered = false;
- ffe_ctl->retry_unclustered = false;
+ ffe_ctl->retry_uncached = false;
ffe_ctl->cached = 0;
ffe_ctl->max_extent_size = 0;
ffe_ctl->total_free_space = 0;
@@ -4321,16 +4526,12 @@ have_block_group:
bg_ret = NULL;
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
- if (ret == 0) {
- if (bg_ret && bg_ret != block_group) {
- btrfs_release_block_group(block_group,
- ffe_ctl->delalloc);
- block_group = bg_ret;
- }
- } else if (ret == -EAGAIN) {
- goto have_block_group;
- } else if (ret > 0) {
+ if (ret > 0)
goto loop;
+
+ if (bg_ret && bg_ret != block_group) {
+ btrfs_release_block_group(block_group, ffe_ctl->delalloc);
+ block_group = bg_ret;
}
/* Checks */
@@ -4371,6 +4572,15 @@ have_block_group:
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
break;
loop:
+ if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
+ !ffe_ctl->retry_uncached) {
+ ffe_ctl->retry_uncached = true;
+ btrfs_wait_block_group_cache_progress(block_group,
+ ffe_ctl->num_bytes +
+ ffe_ctl->empty_cluster +
+ ffe_ctl->empty_size);
+ goto have_block_group;
+ }
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
cond_resched();
}
@@ -4398,8 +4608,8 @@ loop:
}
/*
- * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
- * hole that is at least as big as @num_bytes.
+ * Entry point to the extent allocator. Tries to find a hole that is at least
+ * as big as @num_bytes.
*
* @root - The root that will contain this extent
*
@@ -4518,20 +4728,20 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
- u64 len)
+int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans,
+ const struct extent_buffer *eb)
{
struct btrfs_block_group *cache;
int ret = 0;
- cache = btrfs_lookup_block_group(trans->fs_info, start);
+ cache = btrfs_lookup_block_group(trans->fs_info, eb->start);
if (!cache) {
btrfs_err(trans->fs_info, "unable to find block group for %llu",
- start);
+ eb->start);
return -ENOSPC;
}
- ret = pin_down_extent(trans, cache, start, len, 1);
+ ret = pin_down_extent(trans, cache, eb->start, eb->len, 1);
btrfs_put_block_group(cache);
return ret;
}
@@ -4561,24 +4771,29 @@ static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
- struct btrfs_key *ins, int ref_mod)
+ struct btrfs_key *ins, int ref_mod, u64 oref_root)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *extent_root;
int ret;
struct btrfs_extent_item *extent_item;
+ struct btrfs_extent_owner_ref *oref;
struct btrfs_extent_inline_ref *iref;
struct btrfs_path *path;
struct extent_buffer *leaf;
int type;
u32 size;
+ const bool simple_quota = (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE);
if (parent > 0)
type = BTRFS_SHARED_DATA_REF_KEY;
else
type = BTRFS_EXTENT_DATA_REF_KEY;
- size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
+ size = sizeof(*extent_item);
+ if (simple_quota)
+ size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
+ size += btrfs_extent_inline_ref_size(type);
path = btrfs_alloc_path();
if (!path)
@@ -4600,7 +4815,14 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
flags | BTRFS_EXTENT_FLAG_DATA);
iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+ if (simple_quota) {
+ btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_EXTENT_OWNER_REF_KEY);
+ oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
+ btrfs_set_extent_owner_ref_root_id(leaf, oref, oref_root);
+ iref = (struct btrfs_extent_inline_ref *)(oref + 1);
+ }
btrfs_set_extent_inline_ref_type(leaf, iref, type);
+
if (parent > 0) {
struct btrfs_shared_data_ref *ref;
ref = (struct btrfs_shared_data_ref *)(iref + 1);
@@ -4615,7 +4837,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
}
- btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_mark_buffer_dirty(trans, path->nodes[0]);
btrfs_free_path(path);
return alloc_reserved_extent(trans, ins->objectid, ins->offset);
@@ -4690,7 +4912,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
}
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
btrfs_free_path(path);
return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
@@ -4702,12 +4924,17 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *ins)
{
struct btrfs_ref generic_ref = { 0 };
+ u64 root_objectid = root->root_key.objectid;
+ u64 owning_root = root_objectid;
+
+ BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
- BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
+ if (btrfs_is_data_reloc_root(root) && is_fstree(root->relocation_src_root))
+ owning_root = root->relocation_src_root;
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
- ins->objectid, ins->offset, 0);
- btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
+ ins->objectid, ins->offset, 0, owning_root);
+ btrfs_init_data_ref(&generic_ref, root_objectid, owner,
offset, 0, false);
btrfs_ref_tree_mod(root->fs_info, &generic_ref);
@@ -4727,6 +4954,13 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
+ struct btrfs_squota_delta delta = {
+ .root = root_objectid,
+ .num_bytes = ins->offset,
+ .generation = trans->transid,
+ .is_data = true,
+ .is_inc = true,
+ };
/*
* Mixed block groups will exclude before processing the log so we only
@@ -4752,13 +4986,36 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
spin_unlock(&space_info->lock);
ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
- offset, ins, 1);
+ offset, ins, 1, root_objectid);
if (ret)
btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
+ ret = btrfs_record_squota_delta(fs_info, &delta);
btrfs_put_block_group(block_group);
return ret;
}
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Extra safety check in case the extent tree is corrupted and extent allocator
+ * chooses to use a tree block which is already used and locked.
+ */
+static bool check_eb_lock_owner(const struct extent_buffer *eb)
+{
+ if (eb->lock_owner == current->pid) {
+ btrfs_err_rl(eb->fs_info,
+"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
+ eb->start, btrfs_header_owner(eb), current->pid);
+ return true;
+ }
+ return false;
+}
+#else
+static bool check_eb_lock_owner(struct extent_buffer *eb)
+{
+ return false;
+}
+#endif
+
static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, int level, u64 owner,
@@ -4772,15 +5029,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (IS_ERR(buf))
return buf;
- /*
- * Extra safety check in case the extent tree is corrupted and extent
- * allocator chooses to use a tree block which is already used and
- * locked.
- */
- if (buf->lock_owner == current->pid) {
- btrfs_err_rl(fs_info,
-"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
- buf->start, btrfs_header_owner(buf), current->pid);
+ if (check_eb_lock_owner(buf)) {
free_extent_buffer(buf);
return ERR_PTR(-EUCLEAN);
}
@@ -4857,6 +5106,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
const struct btrfs_disk_key *key,
int level, u64 hint,
u64 empty_size,
+ u64 reloc_src_root,
enum btrfs_lock_nesting nest)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4869,6 +5119,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
int ret;
u32 blocksize = fs_info->nodesize;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
+ u64 owning_root;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (btrfs_is_testing(fs_info)) {
@@ -4895,11 +5146,13 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
ret = PTR_ERR(buf);
goto out_free_reserved;
}
+ owning_root = btrfs_header_owner(buf);
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
parent = ins.objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ owning_root = reloc_src_root;
} else
BUG_ON(parent > 0);
@@ -4919,7 +5172,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->level = level;
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
- ins.objectid, ins.offset, parent);
+ ins.objectid, ins.offset, parent, owning_root);
btrfs_init_tree_ref(&generic_ref, level, root_objectid,
root->root_key.objectid, false);
btrfs_ref_tree_mod(fs_info, &generic_ref);
@@ -5007,7 +5260,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
/* We don't lock the tree block, it's OK to be racy here */
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
wc->level - 1, 1, &refs,
- &flags);
+ &flags, NULL);
/* We don't care about errors in readahead. */
if (ret < 0)
continue;
@@ -5074,7 +5327,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, fs_info,
eb->start, level, 1,
&wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level],
+ NULL);
BUG_ON(ret == -ENOMEM);
if (ret)
return ret;
@@ -5164,6 +5418,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 bytenr;
u64 generation;
u64 parent;
+ u64 owner_root = 0;
struct btrfs_tree_parent_check check = { 0 };
struct btrfs_key key;
struct btrfs_ref ref = { 0 };
@@ -5207,7 +5462,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
&wc->refs[level - 1],
- &wc->flags[level - 1]);
+ &wc->flags[level - 1],
+ &owner_root);
if (ret < 0)
goto out_unlock;
@@ -5340,7 +5596,7 @@ skip:
find_next_key(path, level, &wc->drop_progress);
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
- fs_info->nodesize, parent);
+ fs_info->nodesize, parent, owner_root);
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
0, false);
ret = btrfs_free_extent(trans, &ref);
@@ -5407,7 +5663,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, fs_info,
eb->start, level, 1,
&wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level],
+ NULL);
if (ret < 0) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
@@ -5652,7 +5909,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
ret = btrfs_lookup_extent_info(trans, fs_info,
path->nodes[level]->start,
level, 1, &wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level], NULL);
if (ret < 0) {
err = ret;
goto out_end_trans;