diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
| -rw-r--r-- | fs/btrfs/extent_io.c | 334 | 
1 files changed, 205 insertions, 129 deletions
| diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaddd7225348..4e03a6d3aa32 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -241,7 +241,7 @@ int __init extent_io_init(void)  		return -ENOMEM;  	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, -			offsetof(struct btrfs_io_bio, bio), +			offsetof(struct btrfs_bio, bio),  			BIOSET_NEED_BVECS))  		goto free_buffer_cache; @@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,  /*   * Find and lock a contiguous range of bytes in the file marked as delalloc, no - * more than @max_bytes.  @Start and @end are used to return the range, + * more than @max_bytes.   * - * Return: true if we find something - *         false if nothing was in the tree + * @start:	The original start bytenr to search. + *		Will store the extent range start bytenr. + * @end:	The original end bytenr of the search range + *		Will store the extent range end bytenr. + * + * Return true if we find a delalloc range which starts inside the original + * range, and @start/@end will store the delalloc range start/end. + * + * Return false if we can't find any delalloc range which starts inside the + * original range, and @start/@end will be the non-delalloc range start/end.   */  EXPORT_FOR_TESTS  noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, @@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,  				    u64 *end)  {  	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; +	const u64 orig_start = *start; +	const u64 orig_end = *end;  	u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;  	u64 delalloc_start;  	u64 delalloc_end; @@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,  	int ret;  	int loops = 0; +	/* Caller should pass a valid @end to indicate the search range end */ +	ASSERT(orig_end > orig_start); + +	/* The range should at least cover part of the page */ +	ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE || +		 orig_end <= page_offset(locked_page)));  again:  	/* step one, find a bunch of delalloc bytes starting at start */  	delalloc_start = *start;  	delalloc_end = 0;  	found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,  					  max_bytes, &cached_state); -	if (!found || delalloc_end <= *start) { +	if (!found || delalloc_end <= *start || delalloc_start > orig_end) {  		*start = delalloc_start; -		*end = delalloc_end; + +		/* @delalloc_end can be -1, never go beyond @orig_end */ +		*end = min(delalloc_end, orig_end);  		free_extent_state(cached_state);  		return false;  	} @@ -2282,15 +2300,15 @@ int free_io_failure(struct extent_io_tree *failure_tree,   * currently, there can be no more than two copies of every data bit. thus,   * exactly one rewrite is required.   */ -int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, -		      u64 length, u64 logical, struct page *page, -		      unsigned int pg_offset, int mirror_num) +static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, +			     u64 length, u64 logical, struct page *page, +			     unsigned int pg_offset, int mirror_num)  {  	struct bio *bio;  	struct btrfs_device *dev;  	u64 map_length = 0;  	u64 sector; -	struct btrfs_bio *bbio = NULL; +	struct btrfs_io_context *bioc = NULL;  	int ret;  	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -2299,12 +2317,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,  	if (btrfs_is_zoned(fs_info))  		return btrfs_repair_one_zone(fs_info, logical); -	bio = btrfs_io_bio_alloc(1); +	bio = btrfs_bio_alloc(1);  	bio->bi_iter.bi_size = 0;  	map_length = length;  	/* -	 * Avoid races with device replace and make sure our bbio has devices +	 * Avoid races with device replace and make sure our bioc has devices  	 * associated to its stripes that don't go away while we are doing the  	 * read repair operation.  	 */ @@ -2317,28 +2335,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,  		 * stripe's dev and sector.  		 */  		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, -				      &map_length, &bbio, 0); +				      &map_length, &bioc, 0);  		if (ret) {  			btrfs_bio_counter_dec(fs_info);  			bio_put(bio);  			return -EIO;  		} -		ASSERT(bbio->mirror_num == 1); +		ASSERT(bioc->mirror_num == 1);  	} else {  		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, -				      &map_length, &bbio, mirror_num); +				      &map_length, &bioc, mirror_num);  		if (ret) {  			btrfs_bio_counter_dec(fs_info);  			bio_put(bio);  			return -EIO;  		} -		BUG_ON(mirror_num != bbio->mirror_num); +		BUG_ON(mirror_num != bioc->mirror_num);  	} -	sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; +	sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;  	bio->bi_iter.bi_sector = sector; -	dev = bbio->stripes[bbio->mirror_num - 1].dev; -	btrfs_put_bbio(bbio); +	dev = bioc->stripes[bioc->mirror_num - 1].dev; +	btrfs_put_bioc(bioc);  	if (!dev || !dev->bdev ||  	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {  		btrfs_bio_counter_dec(fs_info); @@ -2618,10 +2636,10 @@ int btrfs_repair_one_sector(struct inode *inode,  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;  	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; -	struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio); +	struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);  	const int icsum = bio_offset >> fs_info->sectorsize_bits;  	struct bio *repair_bio; -	struct btrfs_io_bio *repair_io_bio; +	struct btrfs_bio *repair_bbio;  	blk_status_t status;  	btrfs_debug(fs_info, @@ -2639,24 +2657,23 @@ int btrfs_repair_one_sector(struct inode *inode,  		return -EIO;  	} -	repair_bio = btrfs_io_bio_alloc(1); -	repair_io_bio = btrfs_io_bio(repair_bio); +	repair_bio = btrfs_bio_alloc(1); +	repair_bbio = btrfs_bio(repair_bio);  	repair_bio->bi_opf = REQ_OP_READ;  	repair_bio->bi_end_io = failed_bio->bi_end_io;  	repair_bio->bi_iter.bi_sector = failrec->logical >> 9;  	repair_bio->bi_private = failed_bio->bi_private; -	if (failed_io_bio->csum) { +	if (failed_bbio->csum) {  		const u32 csum_size = fs_info->csum_size; -		repair_io_bio->csum = repair_io_bio->csum_inline; -		memcpy(repair_io_bio->csum, -		       failed_io_bio->csum + csum_size * icsum, csum_size); +		repair_bbio->csum = repair_bbio->csum_inline; +		memcpy(repair_bbio->csum, +		       failed_bbio->csum + csum_size * icsum, csum_size);  	}  	bio_add_page(repair_bio, page, failrec->len, pgoff); -	repair_io_bio->logical = failrec->start; -	repair_io_bio->iter = repair_bio->bi_iter; +	repair_bbio->iter = repair_bio->bi_iter;  	btrfs_debug(btrfs_sb(inode->i_sb),  		    "repair read error: submitting new read to mirror %d", @@ -2976,7 +2993,7 @@ static struct extent_buffer *find_extent_buffer_readpage(  static void end_bio_extent_readpage(struct bio *bio)  {  	struct bio_vec *bvec; -	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); +	struct btrfs_bio *bbio = btrfs_bio(bio);  	struct extent_io_tree *tree, *failure_tree;  	struct processed_extent processed = { 0 };  	/* @@ -3003,7 +3020,7 @@ static void end_bio_extent_readpage(struct bio *bio)  		btrfs_debug(fs_info,  			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",  			bio->bi_iter.bi_sector, bio->bi_status, -			io_bio->mirror_num); +			bbio->mirror_num);  		tree = &BTRFS_I(inode)->io_tree;  		failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -3028,14 +3045,14 @@ static void end_bio_extent_readpage(struct bio *bio)  		end = start + bvec->bv_len - 1;  		len = bvec->bv_len; -		mirror = io_bio->mirror_num; +		mirror = bbio->mirror_num;  		if (likely(uptodate)) {  			if (is_data_inode(inode)) { -				error_bitmap = btrfs_verify_data_csum(io_bio, +				error_bitmap = btrfs_verify_data_csum(bbio,  						bio_offset, page, start, end);  				ret = error_bitmap;  			} else { -				ret = btrfs_validate_metadata_buffer(io_bio, +				ret = btrfs_validate_metadata_buffer(bbio,  					page, start, end, mirror);  			}  			if (ret) @@ -3106,7 +3123,7 @@ readpage_ok:  	}  	/* Release the last extent */  	endio_readpage_release_extent(&processed, NULL, 0, 0, false); -	btrfs_io_bio_free_csum(io_bio); +	btrfs_bio_free_csum(bbio);  	bio_put(bio);  } @@ -3115,53 +3132,43 @@ readpage_ok:   * new bio by bio_alloc_bioset as it does not initialize the bytes outside of   * 'bio' because use of __GFP_ZERO is not supported.   */ -static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) +static inline void btrfs_bio_init(struct btrfs_bio *bbio)  { -	memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); +	memset(bbio, 0, offsetof(struct btrfs_bio, bio));  }  /* - * The following helpers allocate a bio. As it's backed by a bioset, it'll - * never fail.  We're returning a bio right now but you can call btrfs_io_bio - * for the appropriate container_of magic + * Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs. + * + * The bio allocation is backed by bioset and does not fail.   */ -struct bio *btrfs_bio_alloc(u64 first_byte) +struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)  {  	struct bio *bio; -	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); -	bio->bi_iter.bi_sector = first_byte >> 9; -	btrfs_io_bio_init(btrfs_io_bio(bio)); +	ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS); +	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); +	btrfs_bio_init(btrfs_bio(bio));  	return bio;  }  struct bio *btrfs_bio_clone(struct bio *bio)  { -	struct btrfs_io_bio *btrfs_bio; +	struct btrfs_bio *bbio;  	struct bio *new;  	/* Bio allocation backed by a bioset does not fail */  	new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); -	btrfs_bio = btrfs_io_bio(new); -	btrfs_io_bio_init(btrfs_bio); -	btrfs_bio->iter = bio->bi_iter; +	bbio = btrfs_bio(new); +	btrfs_bio_init(bbio); +	bbio->iter = bio->bi_iter;  	return new;  } -struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) -{ -	struct bio *bio; - -	/* Bio allocation backed by a bioset does not fail */ -	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); -	btrfs_io_bio_init(btrfs_io_bio(bio)); -	return bio; -} -  struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)  {  	struct bio *bio; -	struct btrfs_io_bio *btrfs_bio; +	struct btrfs_bio *bbio;  	ASSERT(offset <= UINT_MAX && size <= UINT_MAX); @@ -3169,11 +3176,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)  	bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);  	ASSERT(bio); -	btrfs_bio = btrfs_io_bio(bio); -	btrfs_io_bio_init(btrfs_bio); +	bbio = btrfs_bio(bio); +	btrfs_bio_init(bbio);  	bio_trim(bio, offset >> 9, size >> 9); -	btrfs_bio->iter = bio->bi_iter; +	bbio->iter = bio->bi_iter;  	return bio;  } @@ -3307,14 +3314,15 @@ static int alloc_new_bio(struct btrfs_inode *inode,  	struct bio *bio;  	int ret; +	bio = btrfs_bio_alloc(BIO_MAX_VECS);  	/*  	 * For compressed page range, its disk_bytenr is always @disk_bytenr  	 * passed in, no matter if we have added any range into previous bio.  	 */  	if (bio_flags & EXTENT_BIO_COMPRESSED) -		bio = btrfs_bio_alloc(disk_bytenr); +		bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;  	else -		bio = btrfs_bio_alloc(disk_bytenr + offset); +		bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;  	bio_ctrl->bio = bio;  	bio_ctrl->bio_flags = bio_flags;  	bio->bi_end_io = end_io_func; @@ -3327,7 +3335,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,  	if (wbc) {  		struct block_device *bdev; -		bdev = fs_info->fs_devices->latest_bdev; +		bdev = fs_info->fs_devices->latest_dev->bdev;  		bio_set_dev(bio, bdev);  		wbc_init_bio(wbc, bio);  	} @@ -3341,7 +3349,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,  			goto error;  		} -		btrfs_io_bio(bio)->device = device; +		btrfs_bio(bio)->device = device;  	}  	return 0;  error: @@ -3599,6 +3607,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,  		bool force_bio_submit = false;  		u64 disk_bytenr; +		ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));  		if (cur >= last_byte) {  			struct extent_state *cached = NULL; @@ -3777,17 +3786,18 @@ static void update_nr_written(struct writeback_control *wbc,   */  static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,  		struct page *page, struct writeback_control *wbc, -		u64 delalloc_start, unsigned long *nr_written) +		unsigned long *nr_written)  { -	u64 page_end = delalloc_start + PAGE_SIZE - 1; -	bool found; +	const u64 page_end = page_offset(page) + PAGE_SIZE - 1; +	u64 delalloc_start = page_offset(page);  	u64 delalloc_to_write = 0; -	u64 delalloc_end = 0;  	int ret;  	int page_started = 0; +	while (delalloc_start < page_end) { +		u64 delalloc_end = page_end; +		bool found; -	while (delalloc_end < page_end) {  		found = find_lock_delalloc_range(&inode->vfs_inode, page,  					       &delalloc_start,  					       &delalloc_end); @@ -3854,12 +3864,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,  				 struct page *page, u64 *start, u64 *end)  {  	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; +	struct btrfs_subpage_info *spi = fs_info->subpage_info;  	u64 orig_start = *start;  	/* Declare as unsigned long so we can use bitmap ops */ -	unsigned long dirty_bitmap;  	unsigned long flags; -	int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits; -	int range_start_bit = nbits; +	int range_start_bit;  	int range_end_bit;  	/* @@ -3872,13 +3881,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,  		return;  	} +	range_start_bit = spi->dirty_offset + +			  (offset_in_page(orig_start) >> fs_info->sectorsize_bits); +  	/* We should have the page locked, but just in case */  	spin_lock_irqsave(&subpage->lock, flags); -	dirty_bitmap = subpage->dirty_bitmap; +	bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit, +			       spi->dirty_offset + spi->bitmap_nr_bits);  	spin_unlock_irqrestore(&subpage->lock, flags); -	bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit, -			       BTRFS_SUBPAGE_BITMAP_SIZE); +	range_start_bit -= spi->dirty_offset; +	range_end_bit -= spi->dirty_offset; +  	*start = page_offset(page) + range_start_bit * fs_info->sectorsize;  	*end = page_offset(page) + range_end_bit * fs_info->sectorsize;  } @@ -4054,8 +4068,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,  			      struct extent_page_data *epd)  {  	struct inode *inode = page->mapping->host; -	u64 start = page_offset(page); -	u64 page_end = start + PAGE_SIZE - 1; +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +	const u64 page_start = page_offset(page); +	const u64 page_end = page_start + PAGE_SIZE - 1;  	int ret;  	int nr = 0;  	size_t pg_offset; @@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,  	}  	if (!epd->extent_locked) { -		ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start, -					 &nr_written); +		ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);  		if (ret == 1)  			return 0;  		if (ret) @@ -4141,8 +4155,20 @@ done:  	 * capable of that.  	 */  	if (PageError(page)) -		end_extent_writepage(page, ret, start, page_end); -	unlock_page(page); +		end_extent_writepage(page, ret, page_start, page_end); +	if (epd->extent_locked) { +		/* +		 * If epd->extent_locked, it's from extent_write_locked_range(), +		 * the page can either be locked by lock_page() or +		 * process_one_page(). +		 * Let btrfs_page_unlock_writer() handle both cases. +		 */ +		ASSERT(wbc); +		btrfs_page_unlock_writer(fs_info, page, wbc->range_start, +					 wbc->range_end + 1 - wbc->range_start); +	} else { +		unlock_page(page); +	}  	ASSERT(ret <= 0);  	return ret;  } @@ -4155,6 +4181,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)  static void end_extent_buffer_writeback(struct extent_buffer *eb)  { +	if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags)) +		btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len); +  	clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);  	smp_mb__after_atomic();  	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); @@ -4602,12 +4631,11 @@ static int submit_eb_subpage(struct page *page,  	int submitted = 0;  	u64 page_start = page_offset(page);  	int bit_start = 0; -	const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;  	int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;  	int ret;  	/* Lock and write each dirty extent buffers in the range */ -	while (bit_start < nbits) { +	while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {  		struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;  		struct extent_buffer *eb;  		unsigned long flags; @@ -4623,7 +4651,8 @@ static int submit_eb_subpage(struct page *page,  			break;  		}  		spin_lock_irqsave(&subpage->lock, flags); -		if (!((1 << bit_start) & subpage->dirty_bitmap)) { +		if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset, +			      subpage->bitmaps)) {  			spin_unlock_irqrestore(&subpage->lock, flags);  			spin_unlock(&page->mapping->private_lock);  			bit_start++; @@ -4756,8 +4785,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,  		free_extent_buffer(eb);  		return ret;  	} -	if (cache) +	if (cache) { +		/* Impiles write in zoned mode */  		btrfs_put_block_group(cache); +		/* Mark the last eb in a block group */ +		if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) +			set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); +	}  	ret = write_one_eb(eb, wbc, epd);  	free_extent_buffer(eb);  	if (ret < 0) @@ -4873,7 +4907,7 @@ retry:  	 *   extent io tree. Thus we don't want to submit such wild eb  	 *   if the fs already has error.  	 */ -	if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { +	if (!BTRFS_FS_ERROR(fs_info)) {  		ret = flush_write_bio(&epd);  	} else {  		ret = -EROFS; @@ -5069,23 +5103,28 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)  	return ret;  } -int extent_write_locked_range(struct inode *inode, u64 start, u64 end, -			      int mode) +/* + * Submit the pages in the range to bio for call sites which delalloc range has + * already been ran (aka, ordered extent inserted) and all pages are still + * locked. + */ +int extent_write_locked_range(struct inode *inode, u64 start, u64 end)  { +	bool found_error = false; +	int first_error = 0;  	int ret = 0;  	struct address_space *mapping = inode->i_mapping;  	struct page *page; -	unsigned long nr_pages = (end - start + PAGE_SIZE) >> -		PAGE_SHIFT; - +	u64 cur = start; +	unsigned long nr_pages; +	const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;  	struct extent_page_data epd = {  		.bio_ctrl = { 0 },  		.extent_locked = 1, -		.sync_io = mode == WB_SYNC_ALL, +		.sync_io = 1,  	};  	struct writeback_control wbc_writepages = { -		.sync_mode	= mode, -		.nr_to_write	= nr_pages * 2, +		.sync_mode	= WB_SYNC_ALL,  		.range_start	= start,  		.range_end	= end + 1,  		/* We're called from an async helper function */ @@ -5093,33 +5132,51 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,  		.no_cgroup_owner = 1,  	}; +	ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize)); +	nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >> +		   PAGE_SHIFT; +	wbc_writepages.nr_to_write = nr_pages * 2; +  	wbc_attach_fdatawrite_inode(&wbc_writepages, inode); -	while (start <= end) { -		page = find_get_page(mapping, start >> PAGE_SHIFT); -		if (clear_page_dirty_for_io(page)) -			ret = __extent_writepage(page, &wbc_writepages, &epd); -		else { -			btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), -					page, start, start + PAGE_SIZE - 1, true); -			unlock_page(page); +	while (cur <= end) { +		u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); + +		page = find_get_page(mapping, cur >> PAGE_SHIFT); +		/* +		 * All pages in the range are locked since +		 * btrfs_run_delalloc_range(), thus there is no way to clear +		 * the page dirty flag. +		 */ +		ASSERT(PageLocked(page)); +		ASSERT(PageDirty(page)); +		clear_page_dirty_for_io(page); +		ret = __extent_writepage(page, &wbc_writepages, &epd); +		ASSERT(ret <= 0); +		if (ret < 0) { +			found_error = true; +			first_error = ret;  		}  		put_page(page); -		start += PAGE_SIZE; +		cur = cur_end + 1;  	} -	ASSERT(ret <= 0); -	if (ret == 0) +	if (!found_error)  		ret = flush_write_bio(&epd);  	else  		end_write_bio(&epd, ret);  	wbc_detach_inode(&wbc_writepages); +	if (found_error) +		return first_error;  	return ret;  }  int extent_writepages(struct address_space *mapping,  		      struct writeback_control *wbc)  { +	struct inode *inode = mapping->host; +	const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root); +	const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info);  	int ret = 0;  	struct extent_page_data epd = {  		.bio_ctrl = { 0 }, @@ -5127,7 +5184,15 @@ int extent_writepages(struct address_space *mapping,  		.sync_io = wbc->sync_mode == WB_SYNC_ALL,  	}; +	/* +	 * Allow only a single thread to do the reloc work in zoned mode to +	 * protect the write pointer updates. +	 */ +	if (data_reloc && zoned) +		btrfs_inode_lock(inode, 0);  	ret = extent_write_cache_pages(mapping, wbc, &epd); +	if (data_reloc && zoned) +		btrfs_inode_unlock(inode, 0);  	ASSERT(ret <= 0);  	if (ret < 0) {  		end_write_bio(&epd, ret); @@ -6137,13 +6202,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,  		 * page, but it may change in the future for 16K page size  		 * support, so we still preallocate the memory in the loop.  		 */ -		ret = btrfs_alloc_subpage(fs_info, &prealloc, -					  BTRFS_SUBPAGE_METADATA); -		if (ret < 0) { -			unlock_page(p); -			put_page(p); -			exists = ERR_PTR(ret); -			goto free_eb; +		if (fs_info->sectorsize < PAGE_SIZE) { +			prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA); +			if (IS_ERR(prealloc)) { +				ret = PTR_ERR(prealloc); +				unlock_page(p); +				put_page(p); +				exists = ERR_PTR(ret); +				goto free_eb; +			}  		}  		spin_lock(&mapping->private_lock); @@ -7167,32 +7234,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,  	}  } +#define GANG_LOOKUP_SIZE	16  static struct extent_buffer *get_next_extent_buffer(  		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)  { -	struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE]; +	struct extent_buffer *gang[GANG_LOOKUP_SIZE];  	struct extent_buffer *found = NULL;  	u64 page_start = page_offset(page); -	int ret; -	int i; +	u64 cur = page_start;  	ASSERT(in_range(bytenr, page_start, PAGE_SIZE)); -	ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);  	lockdep_assert_held(&fs_info->buffer_lock); -	ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang, -			bytenr >> fs_info->sectorsize_bits, -			PAGE_SIZE / fs_info->nodesize); -	for (i = 0; i < ret; i++) { -		/* Already beyond page end */ -		if (gang[i]->start >= page_start + PAGE_SIZE) -			break; -		/* Found one */ -		if (gang[i]->start >= bytenr) { -			found = gang[i]; -			break; +	while (cur < page_start + PAGE_SIZE) { +		int ret; +		int i; + +		ret = radix_tree_gang_lookup(&fs_info->buffer_radix, +				(void **)gang, cur >> fs_info->sectorsize_bits, +				min_t(unsigned int, GANG_LOOKUP_SIZE, +				      PAGE_SIZE / fs_info->nodesize)); +		if (ret == 0) +			goto out; +		for (i = 0; i < ret; i++) { +			/* Already beyond page end */ +			if (gang[i]->start >= page_start + PAGE_SIZE) +				goto out; +			/* Found one */ +			if (gang[i]->start >= bytenr) { +				found = gang[i]; +				goto out; +			}  		} +		cur = gang[ret - 1]->start + gang[ret - 1]->len;  	} +out:  	return found;  } |