aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c458
1 files changed, 256 insertions, 202 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c0f1d6818df7..5064be59dac5 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -15,6 +15,7 @@
#include "delalloc-space.h"
#include "discard.h"
#include "raid56.h"
+#include "zoned.h"
/*
* Return target flags in extended format or 0 if restripe for this chunk_type
@@ -424,6 +425,23 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
return ret;
}
+static bool space_cache_v1_done(struct btrfs_block_group *cache)
+{
+ bool ret;
+
+ spin_lock(&cache->lock);
+ ret = cache->cached != BTRFS_CACHE_FAST;
+ spin_unlock(&cache->lock);
+
+ return ret;
+}
+
+void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
+ struct btrfs_caching_control *caching_ctl)
+{
+ wait_event(caching_ctl->wait, space_cache_v1_done(cache));
+}
+
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group)
{
@@ -639,11 +657,36 @@ static noinline void caching_thread(struct btrfs_work *work)
mutex_lock(&caching_ctl->mutex);
down_read(&fs_info->commit_root_sem);
- if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
+ ret = load_free_space_cache(block_group);
+ if (ret == 1) {
+ ret = 0;
+ goto done;
+ }
+
+ /*
+ * We failed to load the space cache, set ourselves to
+ * CACHE_STARTED and carry on.
+ */
+ spin_lock(&block_group->lock);
+ block_group->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&block_group->lock);
+ wake_up(&caching_ctl->wait);
+ }
+
+ /*
+ * If we are in the transaction that populated the free space tree we
+ * can't actually cache from the free space tree as our commit root and
+ * real root are the same, so we could change the contents of the blocks
+ * while caching. Instead do the slow caching in this case, and after
+ * the transaction has committed we will be safe.
+ */
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
ret = load_free_space_tree(caching_ctl);
else
ret = load_extent_tree_free(caching_ctl);
-
+done:
spin_lock(&block_group->lock);
block_group->caching_ctl = NULL;
block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
@@ -679,9 +722,13 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
{
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
- struct btrfs_caching_control *caching_ctl;
+ struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0;
+ /* Allocator for zoned filesystems does not use the cache at all */
+ if (btrfs_is_zoned(fs_info))
+ return 0;
+
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
if (!caching_ctl)
return -ENOMEM;
@@ -691,119 +738,41 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->start;
- refcount_set(&caching_ctl->count, 1);
+ refcount_set(&caching_ctl->count, 2);
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock);
- /*
- * This should be a rare occasion, but this could happen I think in the
- * case where one thread starts to load the space cache info, and then
- * some other thread starts a transaction commit which tries to do an
- * allocation while the other thread is still loading the space cache
- * info. The previous loop should have kept us from choosing this block
- * group, but if we've moved to the state where we will wait on caching
- * block groups we need to first check if we're doing a fast load here,
- * so we can wait for it to finish, otherwise we could end up allocating
- * from a block group who's cache gets evicted for one reason or
- * another.
- */
- while (cache->cached == BTRFS_CACHE_FAST) {
- struct btrfs_caching_control *ctl;
-
- ctl = cache->caching_ctl;
- refcount_inc(&ctl->count);
- prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&cache->lock);
-
- schedule();
-
- finish_wait(&ctl->wait, &wait);
- btrfs_put_caching_control(ctl);
- spin_lock(&cache->lock);
- }
-
if (cache->cached != BTRFS_CACHE_NO) {
- spin_unlock(&cache->lock);
kfree(caching_ctl);
- return 0;
+
+ caching_ctl = cache->caching_ctl;
+ if (caching_ctl)
+ refcount_inc(&caching_ctl->count);
+ spin_unlock(&cache->lock);
+ goto out;
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
- cache->cached = BTRFS_CACHE_FAST;
+ if (btrfs_test_opt(fs_info, SPACE_CACHE))
+ cache->cached = BTRFS_CACHE_FAST;
+ else
+ cache->cached = BTRFS_CACHE_STARTED;
+ cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
- if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
- mutex_lock(&caching_ctl->mutex);
- ret = load_free_space_cache(cache);
-
- spin_lock(&cache->lock);
- if (ret == 1) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_FINISHED;
- cache->last_byte_to_unpin = (u64)-1;
- caching_ctl->progress = (u64)-1;
- } else {
- if (load_cache_only) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_NO;
- } else {
- cache->cached = BTRFS_CACHE_STARTED;
- cache->has_caching_ctl = 1;
- }
- }
- spin_unlock(&cache->lock);
-#ifdef CONFIG_BTRFS_DEBUG
- if (ret == 1 &&
- btrfs_should_fragment_free_space(cache)) {
- u64 bytes_used;
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- bytes_used = cache->length - cache->used;
- cache->space_info->bytes_used += bytes_used >> 1;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- fragment_free_space(cache);
- }
-#endif
- mutex_unlock(&caching_ctl->mutex);
-
- wake_up(&caching_ctl->wait);
- if (ret == 1) {
- btrfs_put_caching_control(caching_ctl);
- btrfs_free_excluded_extents(cache);
- return 0;
- }
- } else {
- /*
- * We're either using the free space tree or no caching at all.
- * Set cached to the appropriate value and wakeup any waiters.
- */
- spin_lock(&cache->lock);
- if (load_cache_only) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_NO;
- } else {
- cache->cached = BTRFS_CACHE_STARTED;
- cache->has_caching_ctl = 1;
- }
- spin_unlock(&cache->lock);
- wake_up(&caching_ctl->wait);
- }
-
- if (load_cache_only) {
- btrfs_put_caching_control(caching_ctl);
- return 0;
- }
-
- down_write(&fs_info->commit_root_sem);
+ spin_lock(&fs_info->block_group_cache_lock);
refcount_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
- up_write(&fs_info->commit_root_sem);
+ spin_unlock(&fs_info->block_group_cache_lock);
btrfs_get_block_group(cache);
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
+out:
+ if (load_cache_only && caching_ctl)
+ btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
+ if (caching_ctl)
+ btrfs_put_caching_control(caching_ctl);
return ret;
}
@@ -892,8 +861,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
- struct btrfs_root *tree_root = fs_info->tree_root;
- struct btrfs_key key;
struct inode *inode;
struct kobject *kobj = NULL;
int ret;
@@ -934,6 +901,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&cluster->refill_lock);
+ btrfs_clear_treelog_bg(block_group);
+
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
@@ -971,42 +940,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&trans->transaction->dirty_bgs_lock);
mutex_unlock(&trans->transaction->cache_write_mutex);
- if (!IS_ERR(inode)) {
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- if (ret) {
- btrfs_add_delayed_iput(inode);
- goto out;
- }
- clear_nlink(inode);
- /* One for the block groups ref */
- spin_lock(&block_group->lock);
- if (block_group->iref) {
- block_group->iref = 0;
- block_group->inode = NULL;
- spin_unlock(&block_group->lock);
- iput(inode);
- } else {
- spin_unlock(&block_group->lock);
- }
- /* One for our lookup ref */
- btrfs_add_delayed_iput(inode);
- }
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.type = 0;
- key.offset = block_group->start;
-
- ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- if (ret < 0)
+ ret = btrfs_remove_free_space_inode(trans, inode, block_group);
+ if (ret)
goto out;
- if (ret > 0)
- btrfs_release_path(path);
- if (ret == 0) {
- ret = btrfs_del_item(trans, tree_root, path);
- if (ret)
- goto out;
- btrfs_release_path(path);
- }
spin_lock(&fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
@@ -1043,7 +979,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (block_group->cached == BTRFS_CACHE_STARTED)
btrfs_wait_block_group_cache_done(block_group);
if (block_group->has_caching_ctl) {
- down_write(&fs_info->commit_root_sem);
+ spin_lock(&fs_info->block_group_cache_lock);
if (!caching_ctl) {
struct btrfs_caching_control *ctl;
@@ -1057,7 +993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
if (caching_ctl)
list_del_init(&caching_ctl->list);
- up_write(&fs_info->commit_root_sem);
+ spin_unlock(&fs_info->block_group_cache_lock);
if (caching_ctl) {
/* Once for the caching bgs list and once for us. */
btrfs_put_caching_control(caching_ctl);
@@ -1079,12 +1015,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
WARN_ON(block_group->space_info->total_bytes
< block_group->length);
WARN_ON(block_group->space_info->bytes_readonly
- < block_group->length);
+ < block_group->length - block_group->zone_unusable);
+ WARN_ON(block_group->space_info->bytes_zone_unusable
+ < block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total
< block_group->length * factor);
}
block_group->space_info->total_bytes -= block_group->length;
- block_group->space_info->bytes_readonly -= block_group->length;
+ block_group->space_info->bytes_readonly -=
+ (block_group->length - block_group->zone_unusable);
+ block_group->space_info->bytes_zone_unusable -=
+ block_group->zone_unusable;
block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
@@ -1228,7 +1169,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
}
num_bytes = cache->length - cache->reserved - cache->pinned -
- cache->bytes_super - cache->used;
+ cache->bytes_super - cache->zone_unusable - cache->used;
/*
* Data never overcommits, even in mixed mode, so do just the straight
@@ -1259,6 +1200,12 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (!ret) {
sinfo->bytes_readonly += num_bytes;
+ if (btrfs_is_zoned(cache->fs_info)) {
+ /* Migrate zone_unusable bytes to readonly */
+ sinfo->bytes_readonly += cache->zone_unusable;
+ sinfo->bytes_zone_unusable -= cache->zone_unusable;
+ cache->zone_unusable = 0;
+ }
cache->ro++;
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
}
@@ -1333,6 +1280,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
+ /*
+ * Long running balances can keep us blocked here for eternity, so
+ * simply skip deletion if we're unable to get the mutex.
+ */
+ if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex))
+ return;
+
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
int trimming;
@@ -1352,8 +1306,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
- mutex_lock(&fs_info->delete_unused_bgs_mutex);
-
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
@@ -1442,9 +1394,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
btrfs_space_info_update_bytes_pinned(fs_info, space_info,
-block_group->pinned);
space_info->bytes_readonly += block_group->pinned;
- percpu_counter_add_batch(&space_info->total_bytes_pinned,
- -block_group->pinned,
- BTRFS_TOTAL_BYTES_PINNED_BATCH);
+ __btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned);
block_group->pinned = 0;
spin_unlock(&block_group->lock);
@@ -1460,8 +1410,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
goto flip_async;
- /* DISCARD can flip during remount */
- trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
+ /*
+ * DISCARD can flip during remount. On zoned filesystems, we
+ * need to reset sequential-required zones.
+ */
+ trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
+ btrfs_is_zoned(fs_info);
/* Implicit trim during transaction commit. */
if (trimming)
@@ -1499,11 +1453,11 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans:
btrfs_end_transaction(trans);
next:
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
return;
flip_async:
@@ -1632,8 +1586,11 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
/**
- * btrfs_rmap_block - Map a physical disk address to a list of logical addresses
+ * Map a physical disk address to a list of logical addresses
+ *
+ * @fs_info: the filesystem
* @chunk_start: logical address of block group
+ * @bdev: physical device to resolve, can be NULL to indicate any device
* @physical: physical address to map to logical addresses
* @logical: return array of logical addresses which map to @physical
* @naddrs: length of @logical
@@ -1643,9 +1600,9 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
* Used primarily to exclude those portions of a block group that contain super
* block copies.
*/
-EXPORT_FOR_TESTS
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
- u64 physical, u64 **logical, int *naddrs, int *stripe_len)
+ struct block_device *bdev, u64 physical, u64 **logical,
+ int *naddrs, int *stripe_len)
{
struct extent_map *em;
struct map_lookup *map;
@@ -1663,6 +1620,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
map = em->map_lookup;
data_stripe_length = em->orig_block_len;
io_stripe_size = map->stripe_len;
+ chunk_start = em->start;
/* For RAID5/6 adjust to a full IO stripe length */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -1677,14 +1635,18 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
for (i = 0; i < map->num_stripes; i++) {
bool already_inserted = false;
u64 stripe_nr;
+ u64 offset;
int j;
if (!in_range(physical, map->stripes[i].physical,
data_stripe_length))
continue;
+ if (bdev && map->stripes[i].dev->bdev != bdev)
+ continue;
+
stripe_nr = physical - map->stripes[i].physical;
- stripe_nr = div64_u64(stripe_nr, map->stripe_len);
+ stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
stripe_nr = stripe_nr * map->num_stripes + i;
@@ -1698,7 +1660,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
* instead of map->stripe_len
*/
- bytenr = chunk_start + stripe_nr * io_stripe_size;
+ bytenr = chunk_start + stripe_nr * io_stripe_size + offset;
/* Ensure we don't add duplicate addresses */
for (j = 0; j < nr; j++) {
@@ -1723,6 +1685,7 @@ out:
static int exclude_super_stripes(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
+ const bool zoned = btrfs_is_zoned(fs_info);
u64 bytenr;
u64 *logical;
int stripe_len;
@@ -1739,11 +1702,19 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(fs_info, cache->start,
+ ret = btrfs_rmap_block(fs_info, cache->start, NULL,
bytenr, &logical, &nr, &stripe_len);
if (ret)
return ret;
+ /* Shouldn't have super stripes in sequential zones */
+ if (zoned && nr) {
+ btrfs_err(fs_info,
+ "zoned: block group %llu must not contain super block",
+ cache->start);
+ return -EUCLEAN;
+ }
+
while (nr--) {
u64 len = min_t(u64, stripe_len,
cache->start + cache->length - logical[nr]);
@@ -1805,7 +1776,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->discard_list);
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
- btrfs_init_free_space_ctl(cache);
+ btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
@@ -1867,24 +1838,8 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
return ret;
}
-static void read_block_group_item(struct btrfs_block_group *cache,
- struct btrfs_path *path,
- const struct btrfs_key *key)
-{
- struct extent_buffer *leaf = path->nodes[0];
- struct btrfs_block_group_item bgi;
- int slot = path->slots[0];
-
- cache->length = key->offset;
-
- read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
- sizeof(bgi));
- cache->used = btrfs_stack_block_group_used(&bgi);
- cache->flags = btrfs_stack_block_group_flags(&bgi);
-}
-
static int read_one_block_group(struct btrfs_fs_info *info,
- struct btrfs_path *path,
+ struct btrfs_block_group_item *bgi,
const struct btrfs_key *key,
int need_clear)
{
@@ -1899,7 +1854,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
if (!cache)
return -ENOMEM;
- read_block_group_item(cache, path, key);
+ cache->length = key->offset;
+ cache->used = btrfs_stack_block_group_used(bgi);
+ cache->flags = btrfs_stack_block_group_flags(bgi);
set_free_space_tree_thresholds(cache);
@@ -1926,6 +1883,13 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
+ ret = btrfs_load_block_group_zone_info(cache, false);
+ if (ret) {
+ btrfs_err(info, "zoned: failed to load zone info of bg %llu",
+ cache->start);
+ goto error;
+ }
+
/*
* We need to exclude the super stripes now so that the space info has
* super bytes accounted for, otherwise we'll think we have more space
@@ -1939,12 +1903,20 @@ static int read_one_block_group(struct btrfs_fs_info *info,
}
/*
- * Check for two cases, either we are full, and therefore don't need
- * to bother with the caching work since we won't find any space, or we
- * are empty, and we can just add all the space in and be done with it.
- * This saves us _a_lot_ of time, particularly in the full case.
+ * For zoned filesystem, space after the allocation offset is the only
+ * free space for a block group. So, we don't need any caching work.
+ * btrfs_calc_zone_unusable() will set the amount of free space and
+ * zone_unusable space.
+ *
+ * For regular filesystem, check for two cases, either we are full, and
+ * therefore don't need to bother with the caching work since we won't
+ * find any space, or we are empty, and we can just add all the space
+ * in and be done with it. This saves us _a_lot_ of time, particularly
+ * in the full case.
*/
- if (cache->length == cache->used) {
+ if (btrfs_is_zoned(info)) {
+ btrfs_calc_zone_unusable(cache);
+ } else if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
@@ -1963,7 +1935,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
}
trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, cache->length,
- cache->used, cache->bytes_super, &space_info);
+ cache->used, cache->bytes_super,
+ cache->zone_unusable, &space_info);
cache->space_info = space_info;
@@ -1985,6 +1958,51 @@ error:
return ret;
}
+static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
+ struct btrfs_space_info *space_info;
+ struct rb_node *node;
+ int ret = 0;
+
+ for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct btrfs_block_group *bg;
+
+ em = rb_entry(node, struct extent_map, rb_node);
+ map = em->map_lookup;
+ bg = btrfs_create_block_group_cache(fs_info, em->start);
+ if (!bg) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ /* Fill dummy cache as FULL */
+ bg->length = em->len;
+ bg->flags = map->type;
+ bg->last_byte_to_unpin = (u64)-1;
+ bg->cached = BTRFS_CACHE_FINISHED;
+ bg->used = em->len;
+ bg->flags = map->type;
+ ret = btrfs_add_block_group_cache(fs_info, bg);
+ if (ret) {
+ btrfs_remove_free_space_cache(bg);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
+ 0, 0, &space_info);
+ bg->space_info = space_info;
+ link_block_group(bg);
+
+ set_avail_alloc_bits(fs_info, bg->flags);
+ }
+ if (!ret)
+ btrfs_init_global_block_rsv(fs_info);
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
@@ -1995,6 +2013,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
int need_clear = 0;
u64 cache_gen;
+ if (!info->extent_root)
+ return fill_dummy_bgs(info);
+
key.objectid = 0;
key.offset = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@@ -2010,20 +2031,31 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
need_clear = 1;
while (1) {
+ struct btrfs_block_group_item bgi;
+ struct extent_buffer *leaf;
+ int slot;
+
ret = find_first_block_group(info, path, &key);
if (ret > 0)
break;
if (ret != 0)
goto error;
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- ret = read_one_block_group(info, path, &key, need_clear);
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ btrfs_release_path(path);
+ ret = read_one_block_group(info, &bgi, &key, need_clear);
if (ret < 0)
goto error;
key.objectid += key.offset;
key.offset = 0;
- btrfs_release_path(path);
}
+ btrfs_release_path(path);
list_for_each_entry(space_info, &info->space_info, list) {
int i;
@@ -2151,7 +2183,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- cache->needs_free_space = 1;
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ cache->needs_free_space = 1;
+
+ ret = btrfs_load_block_group_zone_info(cache, true);
+ if (ret) {
+ btrfs_put_block_group(cache);
+ return ret;
+ }
+
ret = exclude_super_stripes(cache);
if (ret) {
/* We may have excluded something, so call this just in case */
@@ -2193,7 +2233,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
- cache->bytes_super, &cache->space_info);
+ cache->bytes_super, 0, &cache->space_info);
btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -2301,8 +2341,15 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_lock(&cache->lock);
if (!--cache->ro) {
num_bytes = cache->length - cache->reserved -
- cache->pinned - cache->bytes_super - cache->used;
+ cache->pinned - cache->bytes_super -
+ cache->zone_unusable - cache->used;
sinfo->bytes_readonly -= num_bytes;
+ if (btrfs_is_zoned(cache->fs_info)) {
+ /* Migrate zone_unusable bytes back */
+ cache->zone_unusable = cache->alloc_offset - cache->used;
+ sinfo->bytes_zone_unusable += cache->zone_unusable;
+ sinfo->bytes_readonly -= cache->zone_unusable;
+ }
list_del_init(&cache->ro_list);
}
spin_unlock(&cache->lock);
@@ -2360,6 +2407,9 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
int retries = 0;
int ret = 0;
+ if (!btrfs_test_opt(fs_info, SPACE_CACHE))
+ return 0;
+
/*
* If this block group is smaller than 100 megs don't bother caching the
* block group.
@@ -2400,7 +2450,7 @@ again:
* time.
*/
BTRFS_I(inode)->generation = 0;
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret) {
/*
* So theoretically we could recover from this, simply set the
@@ -2573,8 +2623,10 @@ again:
if (!path) {
path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
}
/*
@@ -2668,16 +2720,14 @@ again:
btrfs_put_block_group(cache);
if (drop_reserve)
btrfs_delayed_refs_rsv_release(fs_info, 1);
-
- if (ret)
- break;
-
/*
* Avoid blocking other tasks for too long. It might even save
* us from writing caches for block groups that are going to be
* removed.
*/
mutex_unlock(&trans->transaction->cache_write_mutex);
+ if (ret)
+ goto out;
mutex_lock(&trans->transaction->cache_write_mutex);
}
mutex_unlock(&trans->transaction->cache_write_mutex);
@@ -2686,7 +2736,8 @@ again:
* Go through delayed refs for all the stuff we've just kicked off
* and then loop back (just once)
*/
- ret = btrfs_run_delayed_refs(trans, 0);
+ if (!ret)
+ ret = btrfs_run_delayed_refs(trans, 0);
if (!ret && loops == 0) {
loops++;
spin_lock(&cur_trans->dirty_bgs_lock);
@@ -2700,7 +2751,12 @@ again:
goto again;
}
spin_unlock(&cur_trans->dirty_bgs_lock);
- } else if (ret < 0) {
+ }
+out:
+ if (ret < 0) {
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ list_splice_init(&dirty, &cur_trans->dirty_bgs);
+ spin_unlock(&cur_trans->dirty_bgs_lock);
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
}
@@ -2904,10 +2960,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- percpu_counter_add_batch(
- &cache->space_info->total_bytes_pinned,
- num_bytes,
- BTRFS_TOTAL_BYTES_PINNED_BATCH);
+ __btrfs_mod_total_bytes_pinned(cache->space_info,
+ num_bytes);
set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -3306,14 +3360,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
- down_write(&info->commit_root_sem);
+ spin_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next,
struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
}
- up_write(&info->commit_root_sem);
+ spin_unlock(&info->block_group_cache_lock);
spin_lock(&info->unused_bgs_lock);
while (!list_empty(&info->unused_bgs)) {