diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-rsv.c | 2 | ||||
-rw-r--r-- | fs/btrfs/block-rsv.h | 32 | ||||
-rw-r--r-- | fs/btrfs/defrag.c | 2 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 38 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 22 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 186 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 22 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 18 | ||||
-rw-r--r-- | fs/btrfs/send.c | 17 | ||||
-rw-r--r-- | fs/btrfs/space-info.c | 26 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 2 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 92 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 4 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 44 |
15 files changed, 361 insertions, 148 deletions
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index ceb5f586a2d5..1043a8142351 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -494,7 +494,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index b0bd12b8652f..43a9a6b5a79f 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index c276b136ab63..5b0b64571418 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (range_len >= extent_thresh) + if (em->len >= extent_thresh) goto next; /* diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1502d664c892..fb33027e5a4c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -246,7 +246,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; @@ -257,13 +257,13 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return -EINVAL; } - bdev_handle = bdev_open_by_path(device_path, BLK_OPEN_WRITE, + bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, fs_info->bdev_holder, NULL); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); - return PTR_ERR(bdev_handle); + return PTR_ERR(bdev_file); } - bdev = bdev_handle->bdev; + bdev = file_bdev(bdev_file); if (!btrfs_check_device_zone_type(fs_info, bdev)) { btrfs_err(fs_info, @@ -314,7 +314,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->commit_bytes_used = device->bytes_used; device->fs_info = fs_info; device->bdev = bdev; - device->bdev_handle = bdev_handle; + device->bdev_file = bdev_file; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->dev_stats_valid = 1; @@ -335,7 +335,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return 0; error: - bdev_release(bdev_handle); + fput(bdev_file); return ret; } @@ -725,6 +725,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -737,10 +754,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e71ef97d0a7c..c843563914ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1342,9 +1342,9 @@ again: * that common but still possible. In that case, we just need * to free the anon_dev. */ - if (unlikely(anon_dev)) { - free_anon_bdev(anon_dev); - anon_dev = 0; + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { @@ -1366,7 +1366,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1402,7 +1402,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1418,7 +1418,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9413726b329b..eb3473d1c1ac 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cfd2967f04a2..8b4bef05e222 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, u64 offset, u64 phys, u64 len, u32 flags) { + u64 cache_end; int ret = 0; /* Set at the end of extent_fiemap(). */ @@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, goto assign; /* - * Sanity check, extent_fiemap() should have ensured that new - * fiemap extent won't overlap with cached one. - * Not recoverable. + * When iterating the extents of the inode, at extent_fiemap(), we may + * find an extent that starts at an offset behind the end offset of the + * previous extent we processed. This happens if fiemap is called + * without FIEMAP_FLAG_SYNC and there are ordered extents completing + * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). * - * NOTE: Physical address can overlap, due to compression + * For example we are in leaf X processing its last item, which is the + * file extent item for file range [512K, 1M[, and after + * btrfs_next_leaf() releases the path, there's an ordered extent that + * completes for the file range [768K, 2M[, and that results in trimming + * the file extent item so that it now corresponds to the file range + * [512K, 768K[ and a new file extent item is inserted for the file + * range [768K, 2M[, which may end up as the last item of leaf X or as + * the first item of the next leaf - in either case btrfs_next_leaf() + * will leave us with a path pointing to the new extent item, for the + * file range [768K, 2M[, since that's the first key that follows the + * last one we processed. So in order not to report overlapping extents + * to user space, we trim the length of the previously cached extent and + * emit it. + * + * Upon calling btrfs_next_leaf() we may also find an extent with an + * offset smaller than or equals to cache->offset, and this happens + * when we had a hole or prealloc extent with several delalloc ranges in + * it, but after btrfs_next_leaf() released the path, delalloc was + * flushed and the resulting ordered extents were completed, so we can + * now have found a file extent item for an offset that is smaller than + * or equals to what we have in cache->offset. We deal with this as + * described below. */ - if (cache->offset + cache->len > offset) { - WARN_ON(1); - return -EINVAL; + cache_end = cache->offset + cache->len; + if (cache_end > offset) { + if (offset == cache->offset) { + /* + * We cached a dealloc range (found in the io tree) for + * a hole or prealloc extent and we have now found a + * file extent item for the same offset. What we have + * now is more recent and up to date, so discard what + * we had in the cache and use what we have just found. + */ + goto assign; + } else if (offset > cache->offset) { + /* + * The extent range we previously found ends after the + * offset of the file extent item we found and that + * offset falls somewhere in the middle of that previous + * extent range. So adjust the range we previously found + * to end at the offset of the file extent item we have + * just found, since this extent is more up to date. + * Emit that adjusted range and cache the file extent + * item we have just found. This corresponds to the case + * where a previously found file extent item was split + * due to an ordered extent completing. + */ + cache->len = offset - cache->offset; + goto emit; + } else { + const u64 range_end = offset + len; + + /* + * The offset of the file extent item we have just found + * is behind the cached offset. This means we were + * processing a hole or prealloc extent for which we + * have found delalloc ranges (in the io tree), so what + * we have in the cache is the last delalloc range we + * found while the file extent item we found can be + * either for a whole delalloc range we previously + * emmitted or only a part of that range. + * + * We have two cases here: + * + * 1) The file extent item's range ends at or behind the + * cached extent's end. In this case just ignore the + * current file extent item because we don't want to + * overlap with previous ranges that may have been + * emmitted already; + * + * 2) The file extent item starts behind the currently + * cached extent but its end offset goes beyond the + * end offset of the cached extent. We don't want to + * overlap with a previous range that may have been + * emmitted already, so we emit the currently cached + * extent and then partially store the current file + * extent item's range in the cache, for the subrange + * going the cached extent's end to the end of the + * file extent item. + */ + if (range_end <= cache_end) + return 0; + + if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) + phys += cache_end - offset; + + offset = cache_end; + len = range_end - cache_end; + goto emit; + } } /* @@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, return 0; } +emit: /* Not mergeable, need to submit cached one */ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, cache->len, cache->flags); @@ -2689,16 +2778,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode, * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; + u64 lockstart; + u64 lockend; u64 prealloc_len = 0; bool delalloc; + lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); + lockend = round_up(end, inode->root->fs_info->sectorsize); + + /* + * We are only locking for the delalloc range because that's the + * only thing that can change here. With fiemap we have a lock + * on the inode, so no buffered or direct writes can happen. + * + * However mmaps and normal page writeback will cause this to + * change arbitrarily. We have to lock the extent lock here to + * make sure that nobody messes with the tree while we're doing + * btrfs_find_delalloc_in_range. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) break; @@ -2866,15 +2973,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); - struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end; u64 prev_extent_end; - u64 lockstart; - u64 lockend; + u64 range_start; + u64 range_end; + const u64 sectorsize = inode->root->fs_info->sectorsize; bool stopped = false; int ret; @@ -2885,22 +2992,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - lockstart = round_down(start, inode->root->fs_info->sectorsize); - lockend = round_up(start + len, inode->root->fs_info->sectorsize); - prev_extent_end = lockstart; - - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + range_start = round_down(start, sectorsize); + range_end = round_up(start + len, sectorsize); + prev_extent_end = range_start; ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) - goto out_unlock; + goto out; btrfs_release_path(path); path->reada = READA_FORWARD; - ret = fiemap_search_slot(inode, path, lockstart); + ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between @@ -2910,7 +3014,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto check_eof_delalloc; } - while (prev_extent_end < lockend) { + while (prev_extent_end < range_end) { struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *ei; struct btrfs_key key; @@ -2933,21 +3037,21 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, * The first iteration can leave us at an extent item that ends * before our range's start. Move to the next item. */ - if (extent_end <= lockstart) + if (extent_end <= range_start) goto next_item; backref_ctx->curr_leaf_bytenr = leaf->start; /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { - const u64 range_end = min(key.offset, lockend) - 1; + const u64 hole_end = min(key.offset, range_end) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, - prev_extent_end, range_end); + prev_extent_end, hole_end); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -2955,7 +3059,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } /* We've reached the end of the fiemap range, stop. */ - if (key.offset >= lockend) { + if (key.offset >= range_end) { stopped = true; break; } @@ -3003,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, backref_ctx); if (ret < 0) - goto out_unlock; + goto out; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } @@ -3014,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3025,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, next_item: if (fatal_signal_pending(current)) { ret = -EINTR; - goto out_unlock; + goto out; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* No more file extent items for this inode. */ break; @@ -3049,29 +3153,41 @@ check_eof_delalloc: btrfs_free_path(path); path = NULL; - if (!stopped && prev_extent_end < lockend) { + if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, - 0, 0, 0, prev_extent_end, lockend - 1); + 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) - goto out_unlock; - prev_extent_end = lockend; + goto out; + prev_extent_end = range_end; } if (cache.cached && cache.offset + cache.len >= last_extent_end) { const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; + u64 lockstart; + u64 lockend; bool delalloc; + lockstart = round_down(prev_extent_end, sectorsize); + lockend = round_up(i_size, sectorsize); + + /* + * See the comment in fiemap_process_hole as to why + * we're doing the locking here. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { @@ -3080,10 +3196,6 @@ check_eof_delalloc: } ret = emit_last_fiemap_cache(fieinfo, &cache); - -out_unlock: - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f88e0ca8331d..4795738d5785 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); int ret; ret = fiemap_prep(inode, fieinfo, start, &len, 0); @@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); + + /* + * We did an initial flush to avoid holding the inode's lock while + * triggering writeback and waiting for the completion of IO and ordered + * extents. Now after we locked the inode we do it again, because it's + * possible a new write may have happened in between those two steps. + */ + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); + if (ret) { + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + return ret; + } + } + + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + + return ret; } static int btrfs_writepages(struct address_space *mapping, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac3316e0d11c..9876ee27f069 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); @@ -2698,7 +2698,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; - struct bdev_handle *bdev_handle = NULL; + struct file *bdev_file = NULL; int ret; bool cancel = false; @@ -2735,7 +2735,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) goto err_drop; /* Exclusive operation is now claimed */ - ret = btrfs_rm_device(fs_info, &args, &bdev_handle); + ret = btrfs_rm_device(fs_info, &args, &bdev_file); btrfs_exclop_finish(fs_info); @@ -2749,8 +2749,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) } err_drop: mnt_drop_write_file(file); - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); @@ -2763,7 +2763,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; - struct bdev_handle *bdev_handle = NULL; + struct file *bdev_file = NULL; int ret; bool cancel = false; @@ -2790,15 +2790,15 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, &args, &bdev_handle); + ret = btrfs_rm_device(fs_info, &args, &bdev_file); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); } mnt_drop_write_file(file); - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7902298c1f25..e48a063ef085 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6705,11 +6705,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 571bb13587d5..3b54eb583474 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -856,7 +856,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 thresh; u64 used; @@ -956,8 +956,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1173,7 +1173,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1185,9 +1185,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1207,16 +1207,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c52807d97efa..bf8e64c766b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d67785be2c77..e180da4cc227 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -468,39 +468,39 @@ static noinline struct btrfs_fs_devices *find_fsid( static int btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, - int flush, struct bdev_handle **bdev_handle, + int flush, struct file **bdev_file, struct btrfs_super_block **disk_super) { struct block_device *bdev; int ret; - *bdev_handle = bdev_open_by_path(device_path, flags, holder, NULL); + *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL); - if (IS_ERR(*bdev_handle)) { - ret = PTR_ERR(*bdev_handle); + if (IS_ERR(*bdev_file)) { + ret = PTR_ERR(*bdev_file); goto error; } - bdev = (*bdev_handle)->bdev; + bdev = file_bdev(*bdev_file); if (flush) sync_blockdev(bdev); ret = set_blocksize(bdev, BTRFS_BDEV_BLOCKSIZE); if (ret) { - bdev_release(*bdev_handle); + fput(*bdev_file); goto error; } invalidate_bdev(bdev); *disk_super = btrfs_read_dev_super(bdev); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - bdev_release(*bdev_handle); + fput(*bdev_file); goto error; } return 0; error: - *bdev_handle = NULL; + *bdev_file = NULL; return ret; } @@ -643,7 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device, blk_mode_t flags, void *holder) { - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct btrfs_super_block *disk_super; u64 devid; int ret; @@ -654,7 +654,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, return -EINVAL; ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, - &bdev_handle, &disk_super); + &bdev_file, &disk_super); if (ret) return ret; @@ -678,20 +678,20 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); fs_devices->seeding = true; } else { - if (bdev_read_only(bdev_handle->bdev)) + if (bdev_read_only(file_bdev(bdev_file))) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); else set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); } - if (!bdev_nonrot(bdev_handle->bdev)) + if (!bdev_nonrot(file_bdev(bdev_file))) fs_devices->rotating = true; - if (bdev_max_discard_sectors(bdev_handle->bdev)) + if (bdev_max_discard_sectors(file_bdev(bdev_file))) fs_devices->discardable = true; - device->bdev_handle = bdev_handle; - device->bdev = bdev_handle->bdev; + device->bdev_file = bdev_file; + device->bdev = file_bdev(bdev_file); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); fs_devices->open_devices++; @@ -706,7 +706,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - bdev_release(bdev_handle); + fput(bdev_file); return -EINVAL; } @@ -1015,10 +1015,10 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (device->devid == BTRFS_DEV_REPLACE_DEVID) continue; - if (device->bdev_handle) { - bdev_release(device->bdev_handle); + if (device->bdev_file) { + fput(device->bdev_file); device->bdev = NULL; - device->bdev_handle = NULL; + device->bdev_file = NULL; fs_devices->open_devices--; } if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { @@ -1063,7 +1063,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - bdev_release(device->bdev_handle); + fput(device->bdev_file); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1316,7 +1316,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, struct btrfs_super_block *disk_super; bool new_device_added = false; struct btrfs_device *device = NULL; - struct bdev_handle *bdev_handle; + struct file *bdev_file; u64 bytenr, bytenr_orig; int ret; @@ -1339,18 +1339,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev_handle = bdev_open_by_path(path, flags, NULL, NULL); - if (IS_ERR(bdev_handle)) - return ERR_CAST(bdev_handle); + bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL); + if (IS_ERR(bdev_file)) + return ERR_CAST(bdev_file); bytenr_orig = btrfs_sb_offset(0); - ret = btrfs_sb_log_location_bdev(bdev_handle->bdev, 0, READ, &bytenr); + ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr); if (ret) { device = ERR_PTR(ret); goto error_bdev_put; } - disk_super = btrfs_read_disk_super(bdev_handle->bdev, bytenr, + disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr, bytenr_orig); if (IS_ERR(disk_super)) { device = ERR_CAST(disk_super); @@ -1381,7 +1381,7 @@ free_disk_super: btrfs_release_disk_super(disk_super); error_bdev_put: - bdev_release(bdev_handle); + fput(bdev_file); return device; } @@ -2057,7 +2057,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct bdev_handle **bdev_handle) + struct file **bdev_file) { struct btrfs_trans_handle *trans; struct btrfs_device *device; @@ -2166,7 +2166,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, btrfs_assign_next_active_device(device, NULL); - if (device->bdev_handle) { + if (device->bdev_file) { cur_devices->open_devices--; /* remove sysfs entry */ btrfs_sysfs_remove_device(device); @@ -2182,9 +2182,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, * free the device. * * We cannot call btrfs_close_bdev() here because we're holding the sb - * write lock, and bdev_release() will pull in the ->open_mutex on - * the block device and it's dependencies. Instead just flush the - * device and let the caller do the final bdev_release. + * write lock, and fput() on the block device will pull in the + * ->open_mutex on the block device and it's dependencies. Instead + * just flush the device and let the caller do the final bdev_release. */ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { btrfs_scratch_superblocks(fs_info, device->bdev, @@ -2195,7 +2195,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } } - *bdev_handle = device->bdev_handle; + *bdev_file = device->bdev_file; synchronize_rcu(); btrfs_free_device(device); @@ -2332,7 +2332,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, const char *path) { struct btrfs_super_block *disk_super; - struct bdev_handle *bdev_handle; + struct file *bdev_file; int ret; if (!path || !path[0]) @@ -2350,7 +2350,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, } ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0, - &bdev_handle, &disk_super); + &bdev_file, &disk_super); if (ret) { btrfs_put_dev_args_from_path(args); return ret; @@ -2363,7 +2363,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - bdev_release(bdev_handle); + fput(bdev_file); return 0; } @@ -2583,7 +2583,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path struct btrfs_root *root = fs_info->dev_root; struct btrfs_trans_handle *trans; struct btrfs_device *device; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct super_block *sb = fs_info->sb; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_fs_devices *seed_devices = NULL; @@ -2596,12 +2596,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev_handle = bdev_open_by_path(device_path, BLK_OPEN_WRITE, + bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, fs_info->bdev_holder, NULL); - if (IS_ERR(bdev_handle)) - return PTR_ERR(bdev_handle); + if (IS_ERR(bdev_file)) + return PTR_ERR(bdev_file); - if (!btrfs_check_device_zone_type(fs_info, bdev_handle->bdev)) { + if (!btrfs_check_device_zone_type(fs_info, file_bdev(bdev_file))) { ret = -EINVAL; goto error; } @@ -2613,11 +2613,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path locked = true; } - sync_blockdev(bdev_handle->bdev); + sync_blockdev(file_bdev(bdev_file)); rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { - if (device->bdev == bdev_handle->bdev) { + if (device->bdev == file_bdev(bdev_file)) { ret = -EEXIST; rcu_read_unlock(); goto error; @@ -2633,8 +2633,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } device->fs_info = fs_info; - device->bdev_handle = bdev_handle; - device->bdev = bdev_handle->bdev; + device->bdev_file = bdev_file; + device->bdev = file_bdev(bdev_file); ret = lookup_bdev(device_path, &device->devt); if (ret) goto error_free_device; @@ -2817,7 +2817,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - bdev_release(bdev_handle); + fput(bdev_file); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 53f87f398da7..a11854912d53 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -90,7 +90,7 @@ struct btrfs_device { u64 generation; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; struct btrfs_zoned_device_info *zone_info; @@ -661,7 +661,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct bdev_handle **bdev_handle); + struct file **bdev_file); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3a5d69ff25fc..aea51fd850cd 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -824,11 +824,14 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, reset = &zones[1]; if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { + unsigned int nofs_flags; + ASSERT(sb_zone_is_full(reset)); + nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - reset->start, reset->len, - GFP_NOFS); + reset->start, reset->len); + memalloc_nofs_restore(nofs_flags); if (ret) return ret; @@ -974,11 +977,14 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) * explicit ZONE_FINISH is not necessary. */ if (zone->wp != zone->start + zone->capacity) { + unsigned int nofs_flags; int ret; + nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, zone->start, - zone->len, GFP_NOFS); + zone->len); + memalloc_nofs_restore(nofs_flags); if (ret) return ret; } @@ -996,11 +1002,13 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) { + unsigned int nofs_flags; sector_t zone_sectors; sector_t nr_sectors; u8 zone_sectors_shift; u32 sb_zone; u32 nr_zones; + int ret; zone_sectors = bdev_zone_sectors(bdev); zone_sectors_shift = ilog2(zone_sectors); @@ -1011,9 +1019,12 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) if (sb_zone + 1 >= nr_zones) return -ENOENT; - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - zone_start_sector(sb_zone, bdev), - zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS); + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + zone_start_sector(sb_zone, bdev), + zone_sectors * BTRFS_NR_SB_LOG_ZONES); + memalloc_nofs_restore(nofs_flags); + return ret; } /* @@ -1124,12 +1135,14 @@ static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos) int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, u64 length, u64 *bytes) { + unsigned int nofs_flags; int ret; *bytes = 0; + nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, - physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT, - GFP_NOFS); + physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT); + memalloc_nofs_restore(nofs_flags); if (ret) return ret; @@ -1639,6 +1652,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: + /* Reject non SINGLE data profiles without RST */ + if ((map->type & BTRFS_BLOCK_GROUP_DATA) && + (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + !fs_info->stripe_root) { + btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", + btrfs_bg_type_to_raid_name(map->type)); + return -EINVAL; + } + if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", @@ -2235,14 +2257,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ struct btrfs_device *device = map->stripes[i].dev; const u64 physical = map->stripes[i].physical; struct btrfs_zoned_device_info *zinfo = device->zone_info; + unsigned int nofs_flags; if (zinfo->max_active_zones == 0) continue; + nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, physical >> SECTOR_SHIFT, - zinfo->zone_size >> SECTOR_SHIFT, - GFP_NOFS); + zinfo->zone_size >> SECTOR_SHIFT); + memalloc_nofs_restore(nofs_flags); if (ret) return ret; |