diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 65 | ||||
-rw-r--r-- | fs/btrfs/block-group.h | 9 | ||||
-rw-r--r-- | fs/btrfs/block-rsv.c | 5 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.c | 24 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 77 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 1 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 11 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 10 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 17 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 3 |
11 files changed, 154 insertions, 75 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 48ae509f2ac2..030ab44fce18 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -499,12 +499,16 @@ static void fragment_free_space(struct btrfs_block_group *block_group) * used yet since their free space will be released as soon as the transaction * commits. */ -u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end) +int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, + u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; - u64 extent_start, extent_end, size, total_added = 0; + u64 extent_start, extent_end, size; int ret; + if (total_added_ret) + *total_added_ret = 0; + while (start < end) { ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, @@ -517,10 +521,12 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; start = extent_end + 1; } else { break; @@ -529,13 +535,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end if (start < end) { size = end - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; } - return total_added; + return 0; } /* @@ -779,8 +787,13 @@ next: if (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY) { - total_found += add_new_free_space(block_group, last, - key.objectid); + u64 space_added; + + ret = add_new_free_space(block_group, last, key.objectid, + &space_added); + if (ret) + goto out; + total_found += space_added; if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + fs_info->nodesize; @@ -795,11 +808,10 @@ next: } path->slots[0]++; } - ret = 0; - - total_found += add_new_free_space(block_group, last, - block_group->start + block_group->length); + ret = add_new_free_space(block_group, last, + block_group->start + block_group->length, + NULL); out: btrfs_free_path(path); return ret; @@ -1640,13 +1652,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; - trace_btrfs_add_unused_block_group(bg); spin_lock(&fs_info->unused_bgs_lock); if (list_empty(&bg->bg_list)) { btrfs_get_block_group(bg); + trace_btrfs_add_unused_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->unused_bgs); - } else { + } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) { /* Pull out the block group from the reclaim_bgs list. */ + trace_btrfs_add_unused_block_group(bg); list_move_tail(&bg->bg_list, &fs_info->unused_bgs); } spin_unlock(&fs_info->unused_bgs_lock); @@ -2087,6 +2100,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) /* Shouldn't have super stripes in sequential zones */ if (zoned && nr) { + kfree(logical); btrfs_err(fs_info, "zoned: block group %llu must not contain super block", cache->start); @@ -2292,9 +2306,11 @@ static int read_one_block_group(struct btrfs_fs_info *info, btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; - add_new_free_space(cache, cache->start, - cache->start + cache->length); + ret = add_new_free_space(cache, cache->start, + cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); + if (ret) + goto error; } ret = btrfs_add_block_group_cache(info, cache); @@ -2668,6 +2684,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) next: btrfs_delayed_refs_rsv_release(fs_info, 1); list_del_init(&block_group->bg_list); + clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); } btrfs_trans_release_chunk_metadata(trans); } @@ -2707,6 +2724,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran if (!cache) return ERR_PTR(-ENOMEM); + /* + * Mark it as new before adding it to the rbtree of block groups or any + * list, so that no other task finds it and calls btrfs_mark_bg_unused() + * before the new flag is set. + */ + set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags); + cache->length = size; set_free_space_tree_thresholds(cache); cache->flags = type; @@ -2730,9 +2754,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - add_new_free_space(cache, chunk_offset, chunk_offset + size); - + ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); + if (ret) { + btrfs_put_block_group(cache); + return ERR_PTR(ret); + } /* * Ensure the corresponding space_info object is created and diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index f204addc3fe8..aba5dff66c19 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -70,6 +70,11 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, + /* + * Indicate that block group is in the list of new block groups of a + * transaction. + */ + BLOCK_GROUP_FLAG_NEW, }; enum btrfs_caching_type { @@ -284,8 +289,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); -u64 add_new_free_space(struct btrfs_block_group *block_group, - u64 start, u64 end); +int add_new_free_space(struct btrfs_block_group *block_group, + u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 6279d200cf83..77684c5e0c8b 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -349,6 +349,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) } read_unlock(&fs_info->global_root_lock); + if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { + num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item); + min_items++; + } + /* * But we also want to reserve enough space so we can do the fallback * global reserve for an unlink, which is an additional diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7513388b0567..9b9914e5f03d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3438,11 +3438,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * For devices supporting discard turn on discard=async automatically, * unless it's already set or disabled. This could be turned off by * nodiscard for the same mount. + * + * The zoned mode piggy backs on the discard functionality for + * resetting a zone. There is no reason to delay the zone reset as it is + * fast enough. So, do not enable async discard for zoned mode. */ if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || btrfs_test_opt(fs_info, DISCARD_ASYNC) || btrfs_test_opt(fs_info, NODISCARD)) && - fs_info->fs_devices->discardable) { + fs_info->fs_devices->discardable && + !btrfs_is_zoned(fs_info)) { btrfs_set_and_info(fs_info, DISCARD_ASYNC, "auto enabling async discard"); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 045ddce32eca..f169378e2ca6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1515,9 +1515,13 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { - total_found += add_new_free_space(block_group, - extent_start, - offset); + u64 space_added; + + ret = add_new_free_space(block_group, extent_start, + offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); @@ -1529,8 +1533,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } } if (prev_bit == 1) { - total_found += add_new_free_space(block_group, extent_start, - end); + ret = add_new_free_space(block_group, extent_start, end, NULL); + if (ret) + goto out; extent_count++; } @@ -1569,6 +1574,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, end = block_group->start + block_group->length; while (1) { + u64 space_added; + ret = btrfs_next_item(root, path); if (ret < 0) goto out; @@ -1583,8 +1590,11 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); - total_found += add_new_free_space(block_group, key.objectid, - key.objectid + key.offset); + ret = add_new_free_space(block_group, key.objectid, + key.objectid + key.offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dbbb67293e34..49cef61f6a39 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3482,15 +3482,21 @@ zeroit: void btrfs_add_delayed_iput(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; + unsigned long flags; if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) return; atomic_inc(&fs_info->nr_delayed_iputs); - spin_lock(&fs_info->delayed_iput_lock); + /* + * Need to be irq safe here because we can be called from either an irq + * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq + * context. + */ + spin_lock_irqsave(&fs_info->delayed_iput_lock, flags); ASSERT(list_empty(&inode->delayed_iput)); list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags); if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); } @@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { list_del_init(&inode->delayed_iput); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); iput(&inode->vfs_inode); if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) wake_up(&fs_info->delayed_iputs_wait); - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); } static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { if (!list_empty(&inode->delayed_iput)) { - spin_lock(&fs_info->delayed_iput_lock); + spin_lock_irq(&fs_info->delayed_iput_lock); if (!list_empty(&inode->delayed_iput)) run_delayed_iput_locked(fs_info, inode); - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } } void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) { - - spin_lock(&fs_info->delayed_iput_lock); + /* + * btrfs_put_ordered_extent() can run in irq context (see bio.c), which + * calls btrfs_add_delayed_iput() and that needs to lock + * fs_info->delayed_iput_lock. So we need to disable irqs here to + * prevent a deadlock. + */ + spin_lock_irq(&fs_info->delayed_iput_lock); while (!list_empty(&fs_info->delayed_iputs)) { struct btrfs_inode *inode; inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); - cond_resched_lock(&fs_info->delayed_iput_lock); + if (need_resched()) { + spin_unlock_irq(&fs_info->delayed_iput_lock); + cond_resched(); + spin_lock_irq(&fs_info->delayed_iput_lock); + } } - spin_unlock(&fs_info->delayed_iput_lock); + spin_unlock_irq(&fs_info->delayed_iput_lock); } /* @@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(fs_info->sb, last_objectid, root); - ret = PTR_ERR_OR_ZERO(inode); - if (ret && ret != -ENOENT) - goto out; + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; + if (ret != -ENOENT) + goto out; + } - if (ret == -ENOENT && root == fs_info->tree_root) { + if (!inode && root == fs_info->tree_root) { struct btrfs_root *dead_root; int is_dead_root = 0; @@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * deleted but wasn't. The inode number may have been reused, * but either way, we can delete the orphan item. */ - if (ret == -ENOENT || inode->i_nlink) { - if (!ret) { + if (!inode || inode->i_nlink) { + if (inode) { ret = btrfs_drop_verity_items(BTRFS_I(inode)); iput(inode); + inode = NULL; if (ret) goto out; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - iput(inode); goto out; } btrfs_debug(fs_info, "auto deleting %Lu", @@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = btrfs_del_orphan_item(trans, root, found_key.objectid); btrfs_end_transaction(trans); - if (ret) { - iput(inode); + if (ret) goto out; - } continue; } @@ -4847,9 +4863,6 @@ again: ret = -ENOMEM; goto out; } - ret = set_page_extent_mapped(page); - if (ret < 0) - goto out_unlock; if (!PageUptodate(page)) { ret = btrfs_read_folio(NULL, page_folio(page)); @@ -4864,6 +4877,17 @@ again: goto out_unlock; } } + + /* + * We unlock the page after the io is completed and then re-lock it + * above. release_folio() could have come in between that and cleared + * PagePrivate(), but left the page in the mapping. Set the page mapped + * here to make sure it's properly set for the subpage stuff. + */ + ret = set_page_extent_mapped(page); + if (ret < 0) + goto out_unlock; + wait_on_page_writeback(page); lock_extent(io_tree, block_start, block_end, &cached_state); @@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered); if (ret) { - bbio->bio.bi_status = errno_to_blk_status(ret); - btrfs_dio_end_io(bbio); + btrfs_finish_ordered_extent(dio_data->ordered, NULL, + file_offset, dip->bytes, + !ret); + bio->bi_status = errno_to_blk_status(ret); + iomap_dio_bio_end_io(bio); return; } } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index da1f84a0eb29..2637d6b157ff 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) ulist_free(entry->old_roots); kfree(entry); } + *root = RB_ROOT; } diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f37b925d587f..0249ea52bb80 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work); static void index_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); -static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); +static int finish_parity_scrub(struct btrfs_raid_bio *rbio); static void scrub_rbio_work_locked(struct work_struct *work); static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) @@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) return 0; } -static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) +static int finish_parity_scrub(struct btrfs_raid_bio *rbio) { struct btrfs_io_context *bioc = rbio->bioc; const u32 sectorsize = bioc->fs_info->sectorsize; @@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) */ clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); - if (!need_check) - goto writeback; - p_sector.page = alloc_page(GFP_NOFS); if (!p_sector.page) return -ENOMEM; @@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) q_sector.page = NULL; } -writeback: /* * time to start writing. Make bios for everything from the * higher layers (the bio_list in our rbio) and our p/q. Ignore @@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) static void scrub_rbio(struct btrfs_raid_bio *rbio) { - bool need_check = false; int sector_nr; int ret; @@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio) * We have every sector properly prepared. Can finish the scrub * and writeback the good content. */ - ret = finish_parity_scrub(rbio, need_check); + ret = finish_parity_scrub(rbio); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf306351b148..91b6c2fdc420 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -826,8 +826,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) trans = start_transaction(root, 0, TRANS_ATTACH, BTRFS_RESERVE_NO_FLUSH, true); - if (trans == ERR_PTR(-ENOENT)) - btrfs_wait_for_commit(root->fs_info, 0); + if (trans == ERR_PTR(-ENOENT)) { + int ret; + + ret = btrfs_wait_for_commit(root->fs_info, 0); + if (ret) + return ERR_PTR(ret); + } return trans; } @@ -931,6 +936,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) } wait_for_commit(cur_trans, TRANS_STATE_COMPLETED); + ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); out: return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73f9ea7672db..2ecb76cf3d91 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4078,14 +4078,6 @@ static int alloc_profile_is_valid(u64 flags, int extended) return has_single_bit_set(flags); } -static inline int balance_need_close(struct btrfs_fs_info *fs_info) -{ - /* cancel requested || normal exit path */ - return atomic_read(&fs_info->balance_cancel_req) || - (atomic_read(&fs_info->balance_pause_req) == 0 && - atomic_read(&fs_info->balance_cancel_req) == 0); -} - /* * Validate target profile against allowed profiles and return true if it's OK. * Otherwise print the error message and return false. @@ -4275,6 +4267,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, u64 num_devices; unsigned seq; bool reducing_redundancy; + bool paused = false; int i; if (btrfs_fs_closing(fs_info) || @@ -4405,6 +4398,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) { btrfs_info(fs_info, "balance: paused"); btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED); + paused = true; } /* * Balance can be canceled by: @@ -4433,8 +4427,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, btrfs_update_ioctl_balance_args(fs_info, bargs); } - if ((ret && ret != -ECANCELED && ret != -ENOSPC) || - balance_need_close(fs_info)) { + /* We didn't pause, we can clean everything up. */ + if (!paused) { reset_balance_state(fs_info); btrfs_exclop_finish(fs_info); } @@ -6404,7 +6398,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, (op == BTRFS_MAP_READ || !dev_replace_is_ongoing || !dev_replace->tgtdev)) { set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr); - *mirror_num_ret = mirror_num; + if (mirror_num_ret) + *mirror_num_ret = mirror_num; *bioc_ret = NULL; ret = 0; goto out; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 85b8b332add9..72b90bc19a19 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -805,6 +805,9 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) return -EINVAL; } + btrfs_clear_and_info(info, DISCARD_ASYNC, + "zoned: async discard ignored and disabled for zoned mode"); + return 0; } |