diff options
Diffstat (limited to 'fs/btrfs/block-group.c')
| -rw-r--r-- | fs/btrfs/block-group.c | 458 |
1 files changed, 256 insertions, 202 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c0f1d6818df7..5064be59dac5 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -15,6 +15,7 @@ #include "delalloc-space.h" #include "discard.h" #include "raid56.h" +#include "zoned.h" /* * Return target flags in extended format or 0 if restripe for this chunk_type @@ -424,6 +425,23 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) return ret; } +static bool space_cache_v1_done(struct btrfs_block_group *cache) +{ + bool ret; + + spin_lock(&cache->lock); + ret = cache->cached != BTRFS_CACHE_FAST; + spin_unlock(&cache->lock); + + return ret; +} + +void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, + struct btrfs_caching_control *caching_ctl) +{ + wait_event(caching_ctl->wait, space_cache_v1_done(cache)); +} + #ifdef CONFIG_BTRFS_DEBUG static void fragment_free_space(struct btrfs_block_group *block_group) { @@ -639,11 +657,36 @@ static noinline void caching_thread(struct btrfs_work *work) mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); - if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + if (btrfs_test_opt(fs_info, SPACE_CACHE)) { + ret = load_free_space_cache(block_group); + if (ret == 1) { + ret = 0; + goto done; + } + + /* + * We failed to load the space cache, set ourselves to + * CACHE_STARTED and carry on. + */ + spin_lock(&block_group->lock); + block_group->cached = BTRFS_CACHE_STARTED; + spin_unlock(&block_group->lock); + wake_up(&caching_ctl->wait); + } + + /* + * If we are in the transaction that populated the free space tree we + * can't actually cache from the free space tree as our commit root and + * real root are the same, so we could change the contents of the blocks + * while caching. Instead do the slow caching in this case, and after + * the transaction has committed we will be safe. + */ + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) ret = load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); - +done: spin_lock(&block_group->lock); block_group->caching_ctl = NULL; block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED; @@ -679,9 +722,13 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only { DEFINE_WAIT(wait); struct btrfs_fs_info *fs_info = cache->fs_info; - struct btrfs_caching_control *caching_ctl; + struct btrfs_caching_control *caching_ctl = NULL; int ret = 0; + /* Allocator for zoned filesystems does not use the cache at all */ + if (btrfs_is_zoned(fs_info)) + return 0; + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); if (!caching_ctl) return -ENOMEM; @@ -691,119 +738,41 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; caching_ctl->progress = cache->start; - refcount_set(&caching_ctl->count, 1); + refcount_set(&caching_ctl->count, 2); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); spin_lock(&cache->lock); - /* - * This should be a rare occasion, but this could happen I think in the - * case where one thread starts to load the space cache info, and then - * some other thread starts a transaction commit which tries to do an - * allocation while the other thread is still loading the space cache - * info. The previous loop should have kept us from choosing this block - * group, but if we've moved to the state where we will wait on caching - * block groups we need to first check if we're doing a fast load here, - * so we can wait for it to finish, otherwise we could end up allocating - * from a block group who's cache gets evicted for one reason or - * another. - */ - while (cache->cached == BTRFS_CACHE_FAST) { - struct btrfs_caching_control *ctl; - - ctl = cache->caching_ctl; - refcount_inc(&ctl->count); - prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&cache->lock); - - schedule(); - - finish_wait(&ctl->wait, &wait); - btrfs_put_caching_control(ctl); - spin_lock(&cache->lock); - } - if (cache->cached != BTRFS_CACHE_NO) { - spin_unlock(&cache->lock); kfree(caching_ctl); - return 0; + + caching_ctl = cache->caching_ctl; + if (caching_ctl) + refcount_inc(&caching_ctl->count); + spin_unlock(&cache->lock); + goto out; } WARN_ON(cache->caching_ctl); cache->caching_ctl = caching_ctl; - cache->cached = BTRFS_CACHE_FAST; + if (btrfs_test_opt(fs_info, SPACE_CACHE)) + cache->cached = BTRFS_CACHE_FAST; + else + cache->cached = BTRFS_CACHE_STARTED; + cache->has_caching_ctl = 1; spin_unlock(&cache->lock); - if (btrfs_test_opt(fs_info, SPACE_CACHE)) { - mutex_lock(&caching_ctl->mutex); - ret = load_free_space_cache(cache); - - spin_lock(&cache->lock); - if (ret == 1) { - cache->caching_ctl = NULL; - cache->cached = BTRFS_CACHE_FINISHED; - cache->last_byte_to_unpin = (u64)-1; - caching_ctl->progress = (u64)-1; - } else { - if (load_cache_only) { - cache->caching_ctl = NULL; - cache->cached = BTRFS_CACHE_NO; - } else { - cache->cached = BTRFS_CACHE_STARTED; - cache->has_caching_ctl = 1; - } - } - spin_unlock(&cache->lock); -#ifdef CONFIG_BTRFS_DEBUG - if (ret == 1 && - btrfs_should_fragment_free_space(cache)) { - u64 bytes_used; - - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - bytes_used = cache->length - cache->used; - cache->space_info->bytes_used += bytes_used >> 1; - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - fragment_free_space(cache); - } -#endif - mutex_unlock(&caching_ctl->mutex); - - wake_up(&caching_ctl->wait); - if (ret == 1) { - btrfs_put_caching_control(caching_ctl); - btrfs_free_excluded_extents(cache); - return 0; - } - } else { - /* - * We're either using the free space tree or no caching at all. - * Set cached to the appropriate value and wakeup any waiters. - */ - spin_lock(&cache->lock); - if (load_cache_only) { - cache->caching_ctl = NULL; - cache->cached = BTRFS_CACHE_NO; - } else { - cache->cached = BTRFS_CACHE_STARTED; - cache->has_caching_ctl = 1; - } - spin_unlock(&cache->lock); - wake_up(&caching_ctl->wait); - } - - if (load_cache_only) { - btrfs_put_caching_control(caching_ctl); - return 0; - } - - down_write(&fs_info->commit_root_sem); + spin_lock(&fs_info->block_group_cache_lock); refcount_inc(&caching_ctl->count); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); - up_write(&fs_info->commit_root_sem); + spin_unlock(&fs_info->block_group_cache_lock); btrfs_get_block_group(cache); btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); +out: + if (load_cache_only && caching_ctl) + btrfs_wait_space_cache_v1_finished(cache, caching_ctl); + if (caching_ctl) + btrfs_put_caching_control(caching_ctl); return ret; } @@ -892,8 +861,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_block_group *block_group; struct btrfs_free_cluster *cluster; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_key key; struct inode *inode; struct kobject *kobj = NULL; int ret; @@ -934,6 +901,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&cluster->refill_lock); + btrfs_clear_treelog_bg(block_group); + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; @@ -971,42 +940,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&trans->transaction->dirty_bgs_lock); mutex_unlock(&trans->transaction->cache_write_mutex); - if (!IS_ERR(inode)) { - ret = btrfs_orphan_add(trans, BTRFS_I(inode)); - if (ret) { - btrfs_add_delayed_iput(inode); - goto out; - } - clear_nlink(inode); - /* One for the block groups ref */ - spin_lock(&block_group->lock); - if (block_group->iref) { - block_group->iref = 0; - block_group->inode = NULL; - spin_unlock(&block_group->lock); - iput(inode); - } else { - spin_unlock(&block_group->lock); - } - /* One for our lookup ref */ - btrfs_add_delayed_iput(inode); - } - - key.objectid = BTRFS_FREE_SPACE_OBJECTID; - key.type = 0; - key.offset = block_group->start; - - ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - if (ret < 0) + ret = btrfs_remove_free_space_inode(trans, inode, block_group); + if (ret) goto out; - if (ret > 0) - btrfs_release_path(path); - if (ret == 0) { - ret = btrfs_del_item(trans, tree_root, path); - if (ret) - goto out; - btrfs_release_path(path); - } spin_lock(&fs_info->block_group_cache_lock); rb_erase(&block_group->cache_node, @@ -1043,7 +979,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, if (block_group->cached == BTRFS_CACHE_STARTED) btrfs_wait_block_group_cache_done(block_group); if (block_group->has_caching_ctl) { - down_write(&fs_info->commit_root_sem); + spin_lock(&fs_info->block_group_cache_lock); if (!caching_ctl) { struct btrfs_caching_control *ctl; @@ -1057,7 +993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, } if (caching_ctl) list_del_init(&caching_ctl->list); - up_write(&fs_info->commit_root_sem); + spin_unlock(&fs_info->block_group_cache_lock); if (caching_ctl) { /* Once for the caching bgs list and once for us. */ btrfs_put_caching_control(caching_ctl); @@ -1079,12 +1015,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, WARN_ON(block_group->space_info->total_bytes < block_group->length); WARN_ON(block_group->space_info->bytes_readonly - < block_group->length); + < block_group->length - block_group->zone_unusable); + WARN_ON(block_group->space_info->bytes_zone_unusable + < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); } block_group->space_info->total_bytes -= block_group->length; - block_group->space_info->bytes_readonly -= block_group->length; + block_group->space_info->bytes_readonly -= + (block_group->length - block_group->zone_unusable); + block_group->space_info->bytes_zone_unusable -= + block_group->zone_unusable; block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1228,7 +1169,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) } num_bytes = cache->length - cache->reserved - cache->pinned - - cache->bytes_super - cache->used; + cache->bytes_super - cache->zone_unusable - cache->used; /* * Data never overcommits, even in mixed mode, so do just the straight @@ -1259,6 +1200,12 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (!ret) { sinfo->bytes_readonly += num_bytes; + if (btrfs_is_zoned(cache->fs_info)) { + /* Migrate zone_unusable bytes to readonly */ + sinfo->bytes_readonly += cache->zone_unusable; + sinfo->bytes_zone_unusable -= cache->zone_unusable; + cache->zone_unusable = 0; + } cache->ro++; list_add_tail(&cache->ro_list, &sinfo->ro_bgs); } @@ -1333,6 +1280,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; + /* + * Long running balances can keep us blocked here for eternity, so + * simply skip deletion if we're unable to get the mutex. + */ + if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex)) + return; + spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { int trimming; @@ -1352,8 +1306,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group); - mutex_lock(&fs_info->delete_unused_bgs_mutex); - /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); @@ -1442,9 +1394,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) btrfs_space_info_update_bytes_pinned(fs_info, space_info, -block_group->pinned); space_info->bytes_readonly += block_group->pinned; - percpu_counter_add_batch(&space_info->total_bytes_pinned, - -block_group->pinned, - BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned); block_group->pinned = 0; spin_unlock(&block_group->lock); @@ -1460,8 +1410,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC)) goto flip_async; - /* DISCARD can flip during remount */ - trimming = btrfs_test_opt(fs_info, DISCARD_SYNC); + /* + * DISCARD can flip during remount. On zoned filesystems, we + * need to reset sequential-required zones. + */ + trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) || + btrfs_is_zoned(fs_info); /* Implicit trim during transaction commit. */ if (trimming) @@ -1499,11 +1453,11 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans); next: - mutex_unlock(&fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } spin_unlock(&fs_info->unused_bgs_lock); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); return; flip_async: @@ -1632,8 +1586,11 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } /** - * btrfs_rmap_block - Map a physical disk address to a list of logical addresses + * Map a physical disk address to a list of logical addresses + * + * @fs_info: the filesystem * @chunk_start: logical address of block group + * @bdev: physical device to resolve, can be NULL to indicate any device * @physical: physical address to map to logical addresses * @logical: return array of logical addresses which map to @physical * @naddrs: length of @logical @@ -1643,9 +1600,9 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) * Used primarily to exclude those portions of a block group that contain super * block copies. */ -EXPORT_FOR_TESTS int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, - u64 physical, u64 **logical, int *naddrs, int *stripe_len) + struct block_device *bdev, u64 physical, u64 **logical, + int *naddrs, int *stripe_len) { struct extent_map *em; struct map_lookup *map; @@ -1663,6 +1620,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, map = em->map_lookup; data_stripe_length = em->orig_block_len; io_stripe_size = map->stripe_len; + chunk_start = em->start; /* For RAID5/6 adjust to a full IO stripe length */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) @@ -1677,14 +1635,18 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, for (i = 0; i < map->num_stripes; i++) { bool already_inserted = false; u64 stripe_nr; + u64 offset; int j; if (!in_range(physical, map->stripes[i].physical, data_stripe_length)) continue; + if (bdev && map->stripes[i].dev->bdev != bdev) + continue; + stripe_nr = physical - map->stripes[i].physical; - stripe_nr = div64_u64(stripe_nr, map->stripe_len); + stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset); if (map->type & BTRFS_BLOCK_GROUP_RAID10) { stripe_nr = stripe_nr * map->num_stripes + i; @@ -1698,7 +1660,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, * instead of map->stripe_len */ - bytenr = chunk_start + stripe_nr * io_stripe_size; + bytenr = chunk_start + stripe_nr * io_stripe_size + offset; /* Ensure we don't add duplicate addresses */ for (j = 0; j < nr; j++) { @@ -1723,6 +1685,7 @@ out: static int exclude_super_stripes(struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = cache->fs_info; + const bool zoned = btrfs_is_zoned(fs_info); u64 bytenr; u64 *logical; int stripe_len; @@ -1739,11 +1702,19 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(fs_info, cache->start, + ret = btrfs_rmap_block(fs_info, cache->start, NULL, bytenr, &logical, &nr, &stripe_len); if (ret) return ret; + /* Shouldn't have super stripes in sequential zones */ + if (zoned && nr) { + btrfs_err(fs_info, + "zoned: block group %llu must not contain super block", + cache->start); + return -EUCLEAN; + } + while (nr--) { u64 len = min_t(u64, stripe_len, cache->start + cache->length - logical[nr]); @@ -1805,7 +1776,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( INIT_LIST_HEAD(&cache->discard_list); INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->io_list); - btrfs_init_free_space_ctl(cache); + btrfs_init_free_space_ctl(cache, cache->free_space_ctl); atomic_set(&cache->frozen, 0); mutex_init(&cache->free_space_lock); btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root); @@ -1867,24 +1838,8 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) return ret; } -static void read_block_group_item(struct btrfs_block_group *cache, - struct btrfs_path *path, - const struct btrfs_key *key) -{ - struct extent_buffer *leaf = path->nodes[0]; - struct btrfs_block_group_item bgi; - int slot = path->slots[0]; - - cache->length = key->offset; - - read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), - sizeof(bgi)); - cache->used = btrfs_stack_block_group_used(&bgi); - cache->flags = btrfs_stack_block_group_flags(&bgi); -} - static int read_one_block_group(struct btrfs_fs_info *info, - struct btrfs_path *path, + struct btrfs_block_group_item *bgi, const struct btrfs_key *key, int need_clear) { @@ -1899,7 +1854,9 @@ static int read_one_block_group(struct btrfs_fs_info *info, if (!cache) return -ENOMEM; - read_block_group_item(cache, path, key); + cache->length = key->offset; + cache->used = btrfs_stack_block_group_used(bgi); + cache->flags = btrfs_stack_block_group_flags(bgi); set_free_space_tree_thresholds(cache); @@ -1926,6 +1883,13 @@ static int read_one_block_group(struct btrfs_fs_info *info, goto error; } + ret = btrfs_load_block_group_zone_info(cache, false); + if (ret) { + btrfs_err(info, "zoned: failed to load zone info of bg %llu", + cache->start); + goto error; + } + /* * We need to exclude the super stripes now so that the space info has * super bytes accounted for, otherwise we'll think we have more space @@ -1939,12 +1903,20 @@ static int read_one_block_group(struct btrfs_fs_info *info, } /* - * Check for two cases, either we are full, and therefore don't need - * to bother with the caching work since we won't find any space, or we - * are empty, and we can just add all the space in and be done with it. - * This saves us _a_lot_ of time, particularly in the full case. + * For zoned filesystem, space after the allocation offset is the only + * free space for a block group. So, we don't need any caching work. + * btrfs_calc_zone_unusable() will set the amount of free space and + * zone_unusable space. + * + * For regular filesystem, check for two cases, either we are full, and + * therefore don't need to bother with the caching work since we won't + * find any space, or we are empty, and we can just add all the space + * in and be done with it. This saves us _a_lot_ of time, particularly + * in the full case. */ - if (cache->length == cache->used) { + if (btrfs_is_zoned(info)) { + btrfs_calc_zone_unusable(cache); + } else if (cache->length == cache->used) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; btrfs_free_excluded_extents(cache); @@ -1963,7 +1935,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, } trace_btrfs_add_block_group(info, cache, 0); btrfs_update_space_info(info, cache->flags, cache->length, - cache->used, cache->bytes_super, &space_info); + cache->used, cache->bytes_super, + cache->zone_unusable, &space_info); cache->space_info = space_info; @@ -1985,6 +1958,51 @@ error: return ret; } +static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree; + struct btrfs_space_info *space_info; + struct rb_node *node; + int ret = 0; + + for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) { + struct extent_map *em; + struct map_lookup *map; + struct btrfs_block_group *bg; + + em = rb_entry(node, struct extent_map, rb_node); + map = em->map_lookup; + bg = btrfs_create_block_group_cache(fs_info, em->start); + if (!bg) { + ret = -ENOMEM; + break; + } + + /* Fill dummy cache as FULL */ + bg->length = em->len; + bg->flags = map->type; + bg->last_byte_to_unpin = (u64)-1; + bg->cached = BTRFS_CACHE_FINISHED; + bg->used = em->len; + bg->flags = map->type; + ret = btrfs_add_block_group_cache(fs_info, bg); + if (ret) { + btrfs_remove_free_space_cache(bg); + btrfs_put_block_group(bg); + break; + } + btrfs_update_space_info(fs_info, bg->flags, em->len, em->len, + 0, 0, &space_info); + bg->space_info = space_info; + link_block_group(bg); + + set_avail_alloc_bits(fs_info, bg->flags); + } + if (!ret) + btrfs_init_global_block_rsv(fs_info); + return ret; +} + int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_path *path; @@ -1995,6 +2013,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) int need_clear = 0; u64 cache_gen; + if (!info->extent_root) + return fill_dummy_bgs(info); + key.objectid = 0; key.offset = 0; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; @@ -2010,20 +2031,31 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) need_clear = 1; while (1) { + struct btrfs_block_group_item bgi; + struct extent_buffer *leaf; + int slot; + ret = find_first_block_group(info, path, &key); if (ret > 0) break; if (ret != 0) goto error; - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - ret = read_one_block_group(info, path, &key, need_clear); + leaf = path->nodes[0]; + slot = path->slots[0]; + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + + btrfs_item_key_to_cpu(leaf, &key, slot); + btrfs_release_path(path); + ret = read_one_block_group(info, &bgi, &key, need_clear); if (ret < 0) goto error; key.objectid += key.offset; key.offset = 0; - btrfs_release_path(path); } + btrfs_release_path(path); list_for_each_entry(space_info, &info->space_info, list) { int i; @@ -2151,7 +2183,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, cache->flags = type; cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - cache->needs_free_space = 1; + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + cache->needs_free_space = 1; + + ret = btrfs_load_block_group_zone_info(cache, true); + if (ret) { + btrfs_put_block_group(cache); + return ret; + } + ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ @@ -2193,7 +2233,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, */ trace_btrfs_add_block_group(fs_info, cache, 1); btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, - cache->bytes_super, &cache->space_info); + cache->bytes_super, 0, &cache->space_info); btrfs_update_global_block_rsv(fs_info); link_block_group(cache); @@ -2301,8 +2341,15 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) spin_lock(&cache->lock); if (!--cache->ro) { num_bytes = cache->length - cache->reserved - - cache->pinned - cache->bytes_super - cache->used; + cache->pinned - cache->bytes_super - + cache->zone_unusable - cache->used; sinfo->bytes_readonly -= num_bytes; + if (btrfs_is_zoned(cache->fs_info)) { + /* Migrate zone_unusable bytes back */ + cache->zone_unusable = cache->alloc_offset - cache->used; + sinfo->bytes_zone_unusable += cache->zone_unusable; + sinfo->bytes_readonly -= cache->zone_unusable; + } list_del_init(&cache->ro_list); } spin_unlock(&cache->lock); @@ -2360,6 +2407,9 @@ static int cache_save_setup(struct btrfs_block_group *block_group, int retries = 0; int ret = 0; + if (!btrfs_test_opt(fs_info, SPACE_CACHE)) + return 0; + /* * If this block group is smaller than 100 megs don't bother caching the * block group. @@ -2400,7 +2450,7 @@ again: * time. */ BTRFS_I(inode)->generation = 0; - ret = btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); if (ret) { /* * So theoretically we could recover from this, simply set the @@ -2573,8 +2623,10 @@ again: if (!path) { path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } } /* @@ -2668,16 +2720,14 @@ again: btrfs_put_block_group(cache); if (drop_reserve) btrfs_delayed_refs_rsv_release(fs_info, 1); - - if (ret) - break; - /* * Avoid blocking other tasks for too long. It might even save * us from writing caches for block groups that are going to be * removed. */ mutex_unlock(&trans->transaction->cache_write_mutex); + if (ret) + goto out; mutex_lock(&trans->transaction->cache_write_mutex); } mutex_unlock(&trans->transaction->cache_write_mutex); @@ -2686,7 +2736,8 @@ again: * Go through delayed refs for all the stuff we've just kicked off * and then loop back (just once) */ - ret = btrfs_run_delayed_refs(trans, 0); + if (!ret) + ret = btrfs_run_delayed_refs(trans, 0); if (!ret && loops == 0) { loops++; spin_lock(&cur_trans->dirty_bgs_lock); @@ -2700,7 +2751,12 @@ again: goto again; } spin_unlock(&cur_trans->dirty_bgs_lock); - } else if (ret < 0) { + } +out: + if (ret < 0) { + spin_lock(&cur_trans->dirty_bgs_lock); + list_splice_init(&dirty, &cur_trans->dirty_bgs); + spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_cleanup_dirty_bgs(cur_trans, fs_info); } @@ -2904,10 +2960,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); - percpu_counter_add_batch( - &cache->space_info->total_bytes_pinned, - num_bytes, - BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(cache->space_info, + num_bytes); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); @@ -3306,14 +3360,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_caching_control *caching_ctl; struct rb_node *n; - down_write(&info->commit_root_sem); + spin_lock(&info->block_group_cache_lock); while (!list_empty(&info->caching_block_groups)) { caching_ctl = list_entry(info->caching_block_groups.next, struct btrfs_caching_control, list); list_del(&caching_ctl->list); btrfs_put_caching_control(caching_ctl); } - up_write(&info->commit_root_sem); + spin_unlock(&info->block_group_cache_lock); spin_lock(&info->unused_bgs_lock); while (!list_empty(&info->unused_bgs)) { |