diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 537 | 
1 files changed, 199 insertions, 338 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f0c97d25b4a0..45ebef8d3ea8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -977,7 +977,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,  		if (!(start >= locked_page_end || end <= locked_page_start))  			locked_page = async_chunk->locked_page;  	} -	lock_extent(io_tree, start, end); +	lock_extent(io_tree, start, end, NULL);  	/* We have fall back to uncompressed write */  	if (!async_extent->pages) @@ -1024,7 +1024,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,  				       1 << BTRFS_ORDERED_COMPRESSED,  				       async_extent->compress_type);  	if (ret) { -		btrfs_drop_extent_cache(inode, start, end, 0); +		btrfs_drop_extent_map_range(inode, start, end, false);  		goto out_free_reserve;  	}  	btrfs_dec_block_group_reservations(fs_info, ins.objectid); @@ -1254,7 +1254,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  	}  	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); -	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);  	/*  	 * Relocation relies on the relocated extents to have exactly the same @@ -1319,8 +1318,9 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  			 * skip current ordered extent.  			 */  			if (ret) -				btrfs_drop_extent_cache(inode, start, -						start + ram_size - 1, 0); +				btrfs_drop_extent_map_range(inode, start, +							    start + ram_size - 1, +							    false);  		}  		btrfs_dec_block_group_reservations(fs_info, ins.objectid); @@ -1360,7 +1360,7 @@ out:  	return ret;  out_drop_extent_cache: -	btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); +	btrfs_drop_extent_map_range(inode, start, start + ram_size - 1, false);  out_reserve:  	btrfs_dec_block_group_reservations(fs_info, ins.objectid);  	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); @@ -1524,7 +1524,7 @@ static int cow_file_range_async(struct btrfs_inode *inode,  	unsigned nofs_flag;  	const blk_opf_t write_flags = wbc_to_write_flags(wbc); -	unlock_extent(&inode->io_tree, start, end); +	unlock_extent(&inode->io_tree, start, end, NULL);  	if (inode->flags & BTRFS_INODE_NOCOMPRESS &&  	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { @@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,  			done_offset = end;  		if (done_offset == start) { -			struct btrfs_fs_info *info = inode->root->fs_info; - -			wait_var_event(&info->zone_finish_wait, -				       !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags)); +			wait_on_bit_io(&inode->root->fs_info->flags, +				       BTRFS_FS_NEED_ZONE_FINISH, +				       TASK_UNINTERRUPTIBLE);  			continue;  		} @@ -1667,7 +1666,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,  }  static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info, -					u64 bytenr, u64 num_bytes) +					u64 bytenr, u64 num_bytes, bool nowait)  {  	struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bytenr);  	struct btrfs_ordered_sum *sums; @@ -1675,7 +1674,8 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,  	LIST_HEAD(list);  	ret = btrfs_lookup_csums_range(csum_root, bytenr, -				       bytenr + num_bytes - 1, &list, 0); +				       bytenr + num_bytes - 1, &list, 0, +				       nowait);  	if (ret == 0 && list_empty(&list))  		return 0; @@ -1748,7 +1748,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,  		if (count > 0)  			clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE, -					 0, 0, NULL); +					 NULL);  	}  	return cow_file_range(inode, locked_page, start, end, page_started, @@ -1801,6 +1801,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,  	u8 extent_type;  	int can_nocow = 0;  	int ret = 0; +	bool nowait = path->nowait;  	fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);  	extent_type = btrfs_file_extent_type(leaf, fi); @@ -1877,7 +1878,8 @@ static int can_nocow_file_extent(struct btrfs_path *path,  	 * Force COW if csums exist in the range. This ensures that csums for a  	 * given extent are either valid or do not exist.  	 */ -	ret = csum_exist_in_range(root->fs_info, args->disk_bytenr, args->num_bytes); +	ret = csum_exist_in_range(root->fs_info, args->disk_bytenr, args->num_bytes, +				  nowait);  	WARN_ON_ONCE(ret > 0 && is_freespace_inode);  	if (ret != 0)  		goto out; @@ -2100,8 +2102,8 @@ out_check:  					1 << BTRFS_ORDERED_PREALLOC,  					BTRFS_COMPRESS_NONE);  			if (ret) { -				btrfs_drop_extent_cache(inode, cur_offset, -							nocow_end, 0); +				btrfs_drop_extent_map_range(inode, cur_offset, +							    nocow_end, false);  				goto error;  			}  		} else { @@ -2549,7 +2551,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,  	ASSERT(pre + post < len); -	lock_extent(&inode->io_tree, start, start + len - 1); +	lock_extent(&inode->io_tree, start, start + len - 1, NULL);  	write_lock(&em_tree->lock);  	em = lookup_extent_mapping(em_tree, start, len);  	if (!em) { @@ -2623,7 +2625,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,  out_unlock:  	write_unlock(&em_tree->lock); -	unlock_extent(&inode->io_tree, start, start + len - 1); +	unlock_extent(&inode->io_tree, start, start + len - 1, NULL);  out:  	free_extent_map(split_pre);  	free_extent_map(split_mid); @@ -2701,8 +2703,10 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro  	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {  		ret = extract_ordered_extent(bi, bio,  				page_offset(bio_first_bvec_all(bio)->bv_page)); -		if (ret) -			goto out; +		if (ret) { +			btrfs_bio_end_io(btrfs_bio(bio), ret); +			return; +		}  	}  	/* @@ -2722,16 +2726,12 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro  			return;  		ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false); -		if (ret) -			goto out; +		if (ret) { +			btrfs_bio_end_io(btrfs_bio(bio), ret); +			return; +		}  	}  	btrfs_submit_bio(fs_info, bio, mirror_num); -	return; -out: -	if (ret) { -		bio->bi_status = ret; -		bio_endio(bio); -	}  }  void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, @@ -2758,8 +2758,7 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,  	 */  	ret = btrfs_lookup_bio_sums(inode, bio, NULL);  	if (ret) { -		bio->bi_status = ret; -		bio_endio(bio); +		btrfs_bio_end_io(btrfs_bio(bio), ret);  		return;  	} @@ -2819,8 +2818,8 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,  		ret = set_extent_bit(&inode->io_tree, search_start,  				     search_start + em_len - 1, -				     EXTENT_DELALLOC_NEW, 0, NULL, cached_state, -				     GFP_NOFS, NULL); +				     EXTENT_DELALLOC_NEW, cached_state, +				     GFP_NOFS);  next:  		search_start = extent_map_end(em);  		free_extent_map(em); @@ -2932,7 +2931,7 @@ again:  	if (ret)  		goto out_page; -	lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state); +	lock_extent(&inode->io_tree, page_start, page_end, &cached_state);  	/* already ordered? We're done */  	if (PageOrdered(page)) @@ -2940,8 +2939,8 @@ again:  	ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);  	if (ordered) { -		unlock_extent_cached(&inode->io_tree, page_start, page_end, -				     &cached_state); +		unlock_extent(&inode->io_tree, page_start, page_end, +			      &cached_state);  		unlock_page(page);  		btrfs_start_ordered_extent(ordered, 1);  		btrfs_put_ordered_extent(ordered); @@ -2967,8 +2966,7 @@ out_reserved:  	if (free_delalloc_space)  		btrfs_delalloc_release_space(inode, data_reserved, page_start,  					     PAGE_SIZE, true); -	unlock_extent_cached(&inode->io_tree, page_start, page_end, -			     &cached_state); +	unlock_extent(&inode->io_tree, page_start, page_end, &cached_state);  out_page:  	if (ret) {  		/* @@ -3226,6 +3224,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		clear_bits |= EXTENT_DELALLOC_NEW;  	freespace_inode = btrfs_is_free_space_inode(inode); +	if (!freespace_inode) +		btrfs_lockdep_acquire(fs_info, btrfs_ordered_extent);  	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {  		ret = -EIO; @@ -3270,7 +3270,7 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	}  	clear_bits |= EXTENT_LOCKED; -	lock_extent_bits(io_tree, start, end, &cached_state); +	lock_extent(io_tree, start, end, &cached_state);  	if (freespace_inode)  		trans = btrfs_join_transaction_spacecache(root); @@ -3326,7 +3326,7 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	    !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))  		clear_extent_bit(&inode->io_tree, start, end,  				 EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES, -				 0, 0, &cached_state); +				 &cached_state);  	btrfs_inode_safe_disk_i_size_write(inode, 0);  	ret = btrfs_update_inode_fallback(trans, root, inode); @@ -3337,7 +3337,6 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	ret = 0;  out:  	clear_extent_bit(&inode->io_tree, start, end, clear_bits, -			 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,  			 &cached_state);  	if (trans) @@ -3362,8 +3361,8 @@ out:  			unwritten_start += logical_len;  		clear_extent_uptodate(io_tree, unwritten_start, end, NULL); -		/* Drop the cache for the part of the extent we didn't write. */ -		btrfs_drop_extent_cache(inode, unwritten_start, end, 0); +		/* Drop extent maps for the part of the extent we didn't write. */ +		btrfs_drop_extent_map_range(inode, unwritten_start, end, false);  		/*  		 * If the ordered extent had an IOERR or something else went @@ -3440,6 +3439,13 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,  	return 0;  } +static u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums, u64 offset) +{ +	u64 offset_in_sectors = offset >> fs_info->sectorsize_bits; + +	return csums + offset_in_sectors * fs_info->csum_size; +} +  /*   * check_data_csum - verify checksum of one sector of uncompressed data   * @inode:	inode @@ -4879,9 +4885,9 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,  	block_end = block_start + blocksize - 1;  	ret = btrfs_check_data_free_space(inode, &data_reserved, block_start, -					  blocksize); +					  blocksize, false);  	if (ret < 0) { -		if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) { +		if (btrfs_check_nocow_lock(inode, block_start, &write_bytes, false) > 0) {  			/* For nocow case, no need to reserve data space */  			only_release_metadata = true;  		} else { @@ -4923,12 +4929,11 @@ again:  	}  	wait_on_page_writeback(page); -	lock_extent_bits(io_tree, block_start, block_end, &cached_state); +	lock_extent(io_tree, block_start, block_end, &cached_state);  	ordered = btrfs_lookup_ordered_extent(inode, block_start);  	if (ordered) { -		unlock_extent_cached(io_tree, block_start, block_end, -				     &cached_state); +		unlock_extent(io_tree, block_start, block_end, &cached_state);  		unlock_page(page);  		put_page(page);  		btrfs_start_ordered_extent(ordered, 1); @@ -4938,13 +4943,12 @@ again:  	clear_extent_bit(&inode->io_tree, block_start, block_end,  			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, -			 0, 0, &cached_state); +			 &cached_state);  	ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,  					&cached_state);  	if (ret) { -		unlock_extent_cached(io_tree, block_start, block_end, -				     &cached_state); +		unlock_extent(io_tree, block_start, block_end, &cached_state);  		goto out_unlock;  	} @@ -4961,11 +4965,11 @@ again:  	btrfs_page_clear_checked(fs_info, page, block_start,  				 block_end + 1 - block_start);  	btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start); -	unlock_extent_cached(io_tree, block_start, block_end, &cached_state); +	unlock_extent(io_tree, block_start, block_end, &cached_state);  	if (only_release_metadata)  		set_extent_bit(&inode->io_tree, block_start, block_end, -			       EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL); +			       EXTENT_NORESERVE, NULL, GFP_NOFS);  out_unlock:  	if (ret) { @@ -5022,8 +5026,7 @@ static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,  		return ret;  	} -	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), -			offset, 0, 0, len, 0, len, 0, 0, 0); +	ret = btrfs_insert_hole_extent(trans, root, btrfs_ino(inode), offset, len);  	if (ret) {  		btrfs_abort_transaction(trans, ret);  	} else { @@ -5047,7 +5050,6 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)  	struct extent_io_tree *io_tree = &inode->io_tree;  	struct extent_map *em = NULL;  	struct extent_state *cached_state = NULL; -	struct extent_map_tree *em_tree = &inode->extent_tree;  	u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);  	u64 block_end = ALIGN(size, fs_info->sectorsize);  	u64 last_byte; @@ -5095,10 +5097,11 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)  			if (err)  				break; -			btrfs_drop_extent_cache(inode, cur_offset, -						cur_offset + hole_size - 1, 0);  			hole_em = alloc_extent_map();  			if (!hole_em) { +				btrfs_drop_extent_map_range(inode, cur_offset, +						    cur_offset + hole_size - 1, +						    false);  				btrfs_set_inode_full_sync(inode);  				goto next;  			} @@ -5113,16 +5116,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)  			hole_em->compress_type = BTRFS_COMPRESS_NONE;  			hole_em->generation = fs_info->generation; -			while (1) { -				write_lock(&em_tree->lock); -				err = add_extent_mapping(em_tree, hole_em, 1); -				write_unlock(&em_tree->lock); -				if (err != -EEXIST) -					break; -				btrfs_drop_extent_cache(inode, cur_offset, -							cur_offset + -							hole_size - 1, 0); -			} +			err = btrfs_replace_extent_map_range(inode, hole_em, true);  			free_extent_map(hole_em);  		} else {  			err = btrfs_inode_set_file_extent_range(inode, @@ -5138,7 +5132,7 @@ next:  			break;  	}  	free_extent_map(em); -	unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state); +	unlock_extent(io_tree, hole_start, block_end - 1, &cached_state);  	return err;  } @@ -5272,7 +5266,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr   * While truncating the inode pages during eviction, we get the VFS   * calling btrfs_invalidate_folio() against each folio of the inode. This   * is slow because the calls to btrfs_invalidate_folio() result in a - * huge amount of calls to lock_extent_bits() and clear_extent_bit(), + * huge amount of calls to lock_extent() and clear_extent_bit(),   * which keep merging and splitting extent_state structures over and over,   * wasting lots of time.   * @@ -5284,29 +5278,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr  static void evict_inode_truncate_pages(struct inode *inode)  {  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; -	struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;  	struct rb_node *node;  	ASSERT(inode->i_state & I_FREEING);  	truncate_inode_pages_final(&inode->i_data); -	write_lock(&map_tree->lock); -	while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) { -		struct extent_map *em; - -		node = rb_first_cached(&map_tree->map); -		em = rb_entry(node, struct extent_map, rb_node); -		clear_bit(EXTENT_FLAG_PINNED, &em->flags); -		clear_bit(EXTENT_FLAG_LOGGING, &em->flags); -		remove_extent_mapping(map_tree, em); -		free_extent_map(em); -		if (need_resched()) { -			write_unlock(&map_tree->lock); -			cond_resched(); -			write_lock(&map_tree->lock); -		} -	} -	write_unlock(&map_tree->lock); +	btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);  	/*  	 * Keep looping until we have no more ranges in the io tree. @@ -5339,7 +5316,7 @@ static void evict_inode_truncate_pages(struct inode *inode)  		state_flags = state->state;  		spin_unlock(&io_tree->lock); -		lock_extent_bits(io_tree, start, end, &cached_state); +		lock_extent(io_tree, start, end, &cached_state);  		/*  		 * If still has DELALLOC flag, the extent didn't reach disk, @@ -5354,8 +5331,7 @@ static void evict_inode_truncate_pages(struct inode *inode)  					       end - start + 1);  		clear_extent_bit(io_tree, start, end, -				 EXTENT_LOCKED | EXTENT_DELALLOC | -				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, +				 EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,  				 &cached_state);  		cond_resched(); @@ -5708,6 +5684,11 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)  	BTRFS_I(inode)->location.offset = 0;  	BTRFS_I(inode)->root = btrfs_grab_root(args->root);  	BUG_ON(args->root && !BTRFS_I(inode)->root); + +	if (args->root && args->root == args->root->fs_info->tree_root && +	    args->ino != BTRFS_BTREE_INODE_OBJECTID) +		set_bit(BTRFS_INODE_FREE_SPACE_INODE, +			&BTRFS_I(inode)->runtime_flags);  	return 0;  } @@ -6868,7 +6849,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,  	struct btrfs_key found_key;  	struct extent_map *em = NULL;  	struct extent_map_tree *em_tree = &inode->extent_tree; -	struct extent_io_tree *io_tree = &inode->io_tree;  	read_lock(&em_tree->lock);  	em = lookup_extent_mapping(em_tree, start, len); @@ -7031,8 +7011,6 @@ next:  			}  			flush_dcache_page(page);  		} -		set_extent_uptodate(io_tree, em->start, -				    extent_map_end(em) - 1, NULL, GFP_NOFS);  		goto insert;  	}  not_found: @@ -7066,133 +7044,6 @@ out:  	return em;  } -struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, -					   u64 start, u64 len) -{ -	struct extent_map *em; -	struct extent_map *hole_em = NULL; -	u64 delalloc_start = start; -	u64 end; -	u64 delalloc_len; -	u64 delalloc_end; -	int err = 0; - -	em = btrfs_get_extent(inode, NULL, 0, start, len); -	if (IS_ERR(em)) -		return em; -	/* -	 * If our em maps to: -	 * - a hole or -	 * - a pre-alloc extent, -	 * there might actually be delalloc bytes behind it. -	 */ -	if (em->block_start != EXTENT_MAP_HOLE && -	    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) -		return em; -	else -		hole_em = em; - -	/* check to see if we've wrapped (len == -1 or similar) */ -	end = start + len; -	if (end < start) -		end = (u64)-1; -	else -		end -= 1; - -	em = NULL; - -	/* ok, we didn't find anything, lets look for delalloc */ -	delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start, -				 end, len, EXTENT_DELALLOC, 1); -	delalloc_end = delalloc_start + delalloc_len; -	if (delalloc_end < delalloc_start) -		delalloc_end = (u64)-1; - -	/* -	 * We didn't find anything useful, return the original results from -	 * get_extent() -	 */ -	if (delalloc_start > end || delalloc_end <= start) { -		em = hole_em; -		hole_em = NULL; -		goto out; -	} - -	/* -	 * Adjust the delalloc_start to make sure it doesn't go backwards from -	 * the start they passed in -	 */ -	delalloc_start = max(start, delalloc_start); -	delalloc_len = delalloc_end - delalloc_start; - -	if (delalloc_len > 0) { -		u64 hole_start; -		u64 hole_len; -		const u64 hole_end = extent_map_end(hole_em); - -		em = alloc_extent_map(); -		if (!em) { -			err = -ENOMEM; -			goto out; -		} - -		ASSERT(hole_em); -		/* -		 * When btrfs_get_extent can't find anything it returns one -		 * huge hole -		 * -		 * Make sure what it found really fits our range, and adjust to -		 * make sure it is based on the start from the caller -		 */ -		if (hole_end <= start || hole_em->start > end) { -		       free_extent_map(hole_em); -		       hole_em = NULL; -		} else { -		       hole_start = max(hole_em->start, start); -		       hole_len = hole_end - hole_start; -		} - -		if (hole_em && delalloc_start > hole_start) { -			/* -			 * Our hole starts before our delalloc, so we have to -			 * return just the parts of the hole that go until the -			 * delalloc starts -			 */ -			em->len = min(hole_len, delalloc_start - hole_start); -			em->start = hole_start; -			em->orig_start = hole_start; -			/* -			 * Don't adjust block start at all, it is fixed at -			 * EXTENT_MAP_HOLE -			 */ -			em->block_start = hole_em->block_start; -			em->block_len = hole_len; -			if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) -				set_bit(EXTENT_FLAG_PREALLOC, &em->flags); -		} else { -			/* -			 * Hole is out of passed range or it starts after -			 * delalloc range -			 */ -			em->start = delalloc_start; -			em->len = delalloc_len; -			em->orig_start = delalloc_start; -			em->block_start = EXTENT_MAP_DELALLOC; -			em->block_len = delalloc_len; -		} -	} else { -		return hole_em; -	} -out: - -	free_extent_map(hole_em); -	if (err) { -		free_extent_map(em); -		return ERR_PTR(err); -	} -	return em; -} -  static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,  						  const u64 start,  						  const u64 len, @@ -7222,7 +7073,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,  	if (ret) {  		if (em) {  			free_extent_map(em); -			btrfs_drop_extent_cache(inode, start, start + len - 1, 0); +			btrfs_drop_extent_map_range(inode, start, +						    start + len - 1, false);  		}  		em = ERR_PTR(ret);  	} @@ -7293,7 +7145,7 @@ static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)   */  noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  			      u64 *orig_start, u64 *orig_block_len, -			      u64 *ram_bytes, bool strict) +			      u64 *ram_bytes, bool nowait, bool strict)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct can_nocow_file_extent_args nocow_args = { 0 }; @@ -7309,6 +7161,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->nowait = nowait;  	ret = btrfs_lookup_file_extent(NULL, root, path,  			btrfs_ino(BTRFS_I(inode)), offset, 0); @@ -7405,7 +7258,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,  			if (!try_lock_extent(io_tree, lockstart, lockend))  				return -EAGAIN;  		} else { -			lock_extent_bits(io_tree, lockstart, lockend, cached_state); +			lock_extent(io_tree, lockstart, lockend, cached_state);  		}  		/*  		 * We're concerned with the entire range that we're going to be @@ -7427,7 +7280,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,  							 lockstart, lockend)))  			break; -		unlock_extent_cached(io_tree, lockstart, lockend, cached_state); +		unlock_extent(io_tree, lockstart, lockend, cached_state);  		if (ordered) {  			if (nowait) { @@ -7489,7 +7342,6 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,  				       u64 ram_bytes, int compress_type,  				       int type)  { -	struct extent_map_tree *em_tree;  	struct extent_map *em;  	int ret; @@ -7498,7 +7350,6 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,  	       type == BTRFS_ORDERED_NOCOW ||  	       type == BTRFS_ORDERED_REGULAR); -	em_tree = &inode->extent_tree;  	em = alloc_extent_map();  	if (!em)  		return ERR_PTR(-ENOMEM); @@ -7519,18 +7370,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,  		em->compress_type = compress_type;  	} -	do { -		btrfs_drop_extent_cache(inode, em->start, -					em->start + em->len - 1, 0); -		write_lock(&em_tree->lock); -		ret = add_extent_mapping(em_tree, em, 1); -		write_unlock(&em_tree->lock); -		/* -		 * The caller has taken lock_extent(), who could race with us -		 * to add em? -		 */ -	} while (ret == -EEXIST); - +	ret = btrfs_replace_extent_map_range(inode, em, true);  	if (ret) {  		free_extent_map(em);  		return ERR_PTR(ret); @@ -7578,7 +7418,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,  		block_start = em->block_start + (start - em->start);  		if (can_nocow_extent(inode, start, &len, &orig_start, -				     &orig_block_len, &ram_bytes, false) == 1) { +				     &orig_block_len, &ram_bytes, false, false) == 1) {  			bg = btrfs_inc_nocow_writers(fs_info, block_start);  			if (bg)  				can_nocow = true; @@ -7694,6 +7534,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,  	bool unlock_extents = false;  	/* +	 * We could potentially fault if we have a buffer > PAGE_SIZE, and if +	 * we're NOWAIT we may submit a bio for a partial range and return +	 * EIOCBQUEUED, which would result in an errant short read. +	 * +	 * The best way to handle this would be to allow for partial completions +	 * of iocb's, so we could submit the partial bio, return and fault in +	 * the rest of the pages, and then submit the io for the rest of the +	 * range.  However we don't have that currently, so simply return +	 * -EAGAIN at this point so that the normal path is used. +	 */ +	if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE) +		return -EAGAIN; + +	/*  	 * Cap the size of reads to that usually seen in buffered I/O as we need  	 * to allocate a contiguous array for the checksums.  	 */ @@ -7749,7 +7603,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,  	if (write && !(flags & IOMAP_NOWAIT)) {  		ret = btrfs_check_data_free_space(BTRFS_I(inode),  						  &dio_data->data_reserved, -						  start, data_alloc_len); +						  start, data_alloc_len, false);  		if (!ret)  			dio_data->data_space_reserved = true;  		else if (ret && !(BTRFS_I(inode)->flags & @@ -7871,8 +7725,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,  	}  	if (unlock_extents) -		unlock_extent_cached(&BTRFS_I(inode)->io_tree, -				     lockstart, lockend, &cached_state); +		unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, +			      &cached_state);  	else  		free_extent_state(cached_state); @@ -7901,8 +7755,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,  	return 0;  unlock_err: -	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, -			     &cached_state); +	unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, +		      &cached_state);  err:  	if (dio_data->data_space_reserved) {  		btrfs_free_reserved_data_space(BTRFS_I(inode), @@ -7925,7 +7779,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,  	if (!write && (iomap->type == IOMAP_HOLE)) {  		/* If reading from a hole, unlock and return */ -		unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1); +		unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1, +			      NULL);  		return 0;  	} @@ -7937,7 +7792,7 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,  						       pos, length, false);  		else  			unlock_extent(&BTRFS_I(inode)->io_tree, pos, -				      pos + length - 1); +				      pos + length - 1, NULL);  		ret = -ENOTBLK;  	} @@ -7962,7 +7817,7 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)  	} else {  		unlock_extent(&BTRFS_I(dip->inode)->io_tree,  			      dip->file_offset, -			      dip->file_offset + dip->bytes - 1); +			      dip->file_offset + dip->bytes - 1, NULL);  	}  	kfree(dip->csums); @@ -7973,7 +7828,7 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,  				  int mirror_num,  				  enum btrfs_compression_type compress_type)  { -	struct btrfs_dio_private *dip = bio->bi_private; +	struct btrfs_dio_private *dip = btrfs_bio(bio)->private;  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	BUG_ON(bio_op(bio) == REQ_OP_WRITE); @@ -7988,8 +7843,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,  {  	struct inode *inode = dip->inode;  	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; -	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;  	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);  	blk_status_t err = BLK_STS_OK;  	struct bvec_iter iter; @@ -8002,9 +7855,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,  		if (uptodate &&  		    (!csum || !btrfs_check_data_csum(inode, bbio, offset, bv.bv_page,  					       bv.bv_offset))) { -			clean_io_failure(fs_info, failure_tree, io_tree, start, -					 bv.bv_page, btrfs_ino(BTRFS_I(inode)), -					 bv.bv_offset); +			btrfs_clean_io_failure(BTRFS_I(inode), start, +					       bv.bv_page, bv.bv_offset);  		} else {  			int ret; @@ -8026,10 +7878,10 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,  	return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false);  } -static void btrfs_end_dio_bio(struct bio *bio) +static void btrfs_end_dio_bio(struct btrfs_bio *bbio)  { -	struct btrfs_dio_private *dip = bio->bi_private; -	struct btrfs_bio *bbio = btrfs_bio(bio); +	struct btrfs_dio_private *dip = bbio->private; +	struct bio *bio = &bbio->bio;  	blk_status_t err = bio->bi_status;  	if (err) @@ -8055,7 +7907,7 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,  				 u64 file_offset, int async_submit)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -	struct btrfs_dio_private *dip = bio->bi_private; +	struct btrfs_dio_private *dip = btrfs_bio(bio)->private;  	blk_status_t ret;  	/* Save the original iter for read repair */ @@ -8078,8 +7930,7 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,  		 */  		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);  		if (ret) { -			bio->bi_status = ret; -			bio_endio(bio); +			btrfs_bio_end_io(btrfs_bio(bio), ret);  			return;  		}  	} else { @@ -8162,9 +8013,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,  		 * This will never fail as it's passing GPF_NOFS and  		 * the allocation is backed by btrfs_bioset.  		 */ -		bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); -		bio->bi_private = dip; -		bio->bi_end_io = btrfs_end_dio_bio; +		bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len, +					      btrfs_end_dio_bio, dip);  		btrfs_bio(bio)->file_offset = file_offset;  		if (bio_op(bio) == REQ_OP_ZONE_APPEND) { @@ -8246,6 +8096,25 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  	if (ret)  		return ret; +	/* +	 * fiemap_prep() called filemap_write_and_wait() for the whole possible +	 * file range (0 to LLONG_MAX), but that is not enough if we have +	 * compression enabled. The first filemap_fdatawrite_range() only kicks +	 * in the compression of data (in an async thread) and will return +	 * before the compression is done and writeback is started. A second +	 * filemap_fdatawrite_range() is needed to wait for the compression to +	 * complete and writeback to start. We also need to wait for ordered +	 * extents to complete, because our fiemap implementation uses mainly +	 * file extent items to list the extents, searching for extent maps +	 * only for file ranges with holes or prealloc extents to figure out +	 * if we have delalloc in those ranges. +	 */ +	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { +		ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); +		if (ret) +			return ret; +	} +  	return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);  } @@ -8378,14 +8247,14 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  	}  	if (!inode_evicting) -		lock_extent_bits(tree, page_start, page_end, &cached_state); +		lock_extent(tree, page_start, page_end, &cached_state);  	cur = page_start;  	while (cur < page_end) {  		struct btrfs_ordered_extent *ordered; -		bool delete_states;  		u64 range_end;  		u32 range_len; +		u32 extra_flags = 0;  		ordered = btrfs_lookup_first_ordered_range(inode, cur,  							   page_end + 1 - cur); @@ -8395,7 +8264,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  			 * No ordered extent covering this range, we are safe  			 * to delete all extent states in the range.  			 */ -			delete_states = true; +			extra_flags = EXTENT_CLEAR_ALL_BITS;  			goto next;  		}  		if (ordered->file_offset > cur) { @@ -8406,7 +8275,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  			 * the ordered extent in the next iteration.  			 */  			range_end = ordered->file_offset - 1; -			delete_states = true; +			extra_flags = EXTENT_CLEAR_ALL_BITS;  			goto next;  		} @@ -8421,7 +8290,6 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  			 * We can't delete the extent states as  			 * btrfs_finish_ordered_io() may still use some of them.  			 */ -			delete_states = false;  			goto next;  		}  		btrfs_page_clear_ordered(fs_info, &folio->page, cur, range_len); @@ -8438,7 +8306,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  			clear_extent_bit(tree, cur, range_end,  					 EXTENT_DELALLOC |  					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | -					 EXTENT_DEFRAG, 1, 0, &cached_state); +					 EXTENT_DEFRAG, &cached_state);  		spin_lock_irq(&inode->ordered_tree.lock);  		set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); @@ -8446,6 +8314,12 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  					     cur - ordered->file_offset);  		spin_unlock_irq(&inode->ordered_tree.lock); +		/* +		 * If the ordered extent has finished, we're safe to delete all +		 * the extent states of the range, otherwise +		 * btrfs_finish_ordered_io() will get executed by endio for +		 * other pages, so we can't delete extent states. +		 */  		if (btrfs_dec_test_ordered_pending(inode, &ordered,  						   cur, range_end + 1 - cur)) {  			btrfs_finish_ordered_io(ordered); @@ -8453,14 +8327,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,  			 * The ordered extent has finished, now we're again  			 * safe to delete all extent states of the range.  			 */ -			delete_states = true; -		} else { -			/* -			 * btrfs_finish_ordered_io() will get executed by endio -			 * of other pages, thus we can't delete extent states -			 * anymore -			 */ -			delete_states = false; +			extra_flags = EXTENT_CLEAR_ALL_BITS;  		}  next:  		if (ordered) @@ -8484,8 +8351,8 @@ next:  		if (!inode_evicting) {  			clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |  				 EXTENT_DELALLOC | EXTENT_UPTODATE | -				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, -				 delete_states, &cached_state); +				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG | +				 extra_flags, &cached_state);  		}  		cur = range_end + 1;  	} @@ -8576,11 +8443,11 @@ again:  	}  	wait_on_page_writeback(page); -	lock_extent_bits(io_tree, page_start, page_end, &cached_state); +	lock_extent(io_tree, page_start, page_end, &cached_state);  	ret2 = set_page_extent_mapped(page);  	if (ret2 < 0) {  		ret = vmf_error(ret2); -		unlock_extent_cached(io_tree, page_start, page_end, &cached_state); +		unlock_extent(io_tree, page_start, page_end, &cached_state);  		goto out_unlock;  	} @@ -8591,8 +8458,7 @@ again:  	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,  			PAGE_SIZE);  	if (ordered) { -		unlock_extent_cached(io_tree, page_start, page_end, -				     &cached_state); +		unlock_extent(io_tree, page_start, page_end, &cached_state);  		unlock_page(page);  		up_read(&BTRFS_I(inode)->i_mmap_lock);  		btrfs_start_ordered_extent(ordered, 1); @@ -8620,13 +8486,12 @@ again:  	 */  	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,  			  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | -			  EXTENT_DEFRAG, 0, 0, &cached_state); +			  EXTENT_DEFRAG, &cached_state);  	ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,  					&cached_state);  	if (ret2) { -		unlock_extent_cached(io_tree, page_start, page_end, -				     &cached_state); +		unlock_extent(io_tree, page_start, page_end, &cached_state);  		ret = VM_FAULT_SIGBUS;  		goto out_unlock;  	} @@ -8646,7 +8511,7 @@ again:  	btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); -	unlock_extent_cached(io_tree, page_start, page_end, &cached_state); +	unlock_extent(io_tree, page_start, page_end, &cached_state);  	up_read(&BTRFS_I(inode)->i_mmap_lock);  	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); @@ -8747,24 +8612,24 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)  		const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);  		control.new_size = new_size; -		lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, +		lock_extent(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,  				 &cached_state);  		/*  		 * We want to drop from the next block forward in case this new  		 * size is not block aligned since we will be keeping the last  		 * block of the extent just the way it is.  		 */ -		btrfs_drop_extent_cache(BTRFS_I(inode), -					ALIGN(new_size, fs_info->sectorsize), -					(u64)-1, 0); +		btrfs_drop_extent_map_range(BTRFS_I(inode), +					    ALIGN(new_size, fs_info->sectorsize), +					    (u64)-1, false);  		ret = btrfs_truncate_inode_items(trans, root, &control);  		inode_sub_bytes(inode, control.sub_bytes);  		btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), control.last_size); -		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, -				     (u64)-1, &cached_state); +		unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, +			      &cached_state);  		trans->block_rsv = &fs_info->trans_block_rsv;  		if (ret != -ENOSPC && ret != -EAGAIN) @@ -8895,6 +8760,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  	ei->last_log_commit = 0;  	spin_lock_init(&ei->lock); +	spin_lock_init(&ei->io_failure_lock);  	ei->outstanding_extents = 0;  	if (sb->s_magic != BTRFS_TEST_MAGIC)  		btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv, @@ -8911,12 +8777,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  	inode = &ei->vfs_inode;  	extent_map_tree_init(&ei->extent_tree);  	extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); -	extent_io_tree_init(fs_info, &ei->io_failure_tree, -			    IO_TREE_INODE_IO_FAILURE, inode);  	extent_io_tree_init(fs_info, &ei->file_extent_tree, -			    IO_TREE_INODE_FILE_EXTENT, inode); -	ei->io_tree.track_uptodate = true; -	ei->io_failure_tree.track_uptodate = true; +			    IO_TREE_INODE_FILE_EXTENT, NULL); +	ei->io_failure_tree = RB_ROOT;  	atomic_set(&ei->sync_writers, 0);  	mutex_init(&ei->log_mutex);  	btrfs_ordered_inode_tree_init(&ei->ordered_tree); @@ -8931,7 +8794,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS  void btrfs_test_destroy_inode(struct inode *inode)  { -	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); +	btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);  	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));  }  #endif @@ -8946,6 +8809,7 @@ void btrfs_destroy_inode(struct inode *vfs_inode)  	struct btrfs_ordered_extent *ordered;  	struct btrfs_inode *inode = BTRFS_I(vfs_inode);  	struct btrfs_root *root = inode->root; +	bool freespace_inode;  	WARN_ON(!hlist_empty(&vfs_inode->i_dentry));  	WARN_ON(vfs_inode->i_data.nrpages); @@ -8967,6 +8831,12 @@ void btrfs_destroy_inode(struct inode *vfs_inode)  	if (!root)  		return; +	/* +	 * If this is a free space inode do not take the ordered extents lockdep +	 * map. +	 */ +	freespace_inode = btrfs_is_free_space_inode(inode); +  	while (1) {  		ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);  		if (!ordered) @@ -8975,6 +8845,10 @@ void btrfs_destroy_inode(struct inode *vfs_inode)  			btrfs_err(root->fs_info,  				  "found ordered extent %llu %llu on inode cleanup",  				  ordered->file_offset, ordered->num_bytes); + +			if (!freespace_inode) +				btrfs_lockdep_acquire(root->fs_info, btrfs_ordered_extent); +  			btrfs_remove_ordered_extent(inode, ordered);  			btrfs_put_ordered_extent(ordered);  			btrfs_put_ordered_extent(ordered); @@ -8982,7 +8856,7 @@ void btrfs_destroy_inode(struct inode *vfs_inode)  	}  	btrfs_qgroup_check_reserved_leak(inode);  	inode_tree_del(inode); -	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); +	btrfs_drop_extent_map_range(inode, 0, (u64)-1, false);  	btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1);  	btrfs_put_root(inode->root);  } @@ -9995,7 +9869,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  				       struct btrfs_trans_handle *trans)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	struct extent_map *em;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_key ins; @@ -10051,11 +9924,10 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  			break;  		} -		btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset, -					cur_offset + ins.offset -1, 0); -  		em = alloc_extent_map();  		if (!em) { +			btrfs_drop_extent_map_range(BTRFS_I(inode), cur_offset, +					    cur_offset + ins.offset - 1, false);  			btrfs_set_inode_full_sync(BTRFS_I(inode));  			goto next;  		} @@ -10070,16 +9942,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  		set_bit(EXTENT_FLAG_PREALLOC, &em->flags);  		em->generation = trans->transid; -		while (1) { -			write_lock(&em_tree->lock); -			ret = add_extent_mapping(em_tree, em, 1); -			write_unlock(&em_tree->lock); -			if (ret != -EEXIST) -				break; -			btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset, -						cur_offset + ins.offset - 1, -						0); -		} +		ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, true);  		free_extent_map(em);  next:  		num_bytes -= ins.offset; @@ -10333,7 +10196,7 @@ static ssize_t btrfs_encoded_read_inline(  	}  	read_extent_buffer(leaf, tmp, ptr, count);  	btrfs_release_path(path); -	unlock_extent_cached(io_tree, start, lockend, cached_state); +	unlock_extent(io_tree, start, lockend, cached_state);  	btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);  	*unlocked = true; @@ -10358,7 +10221,7 @@ struct btrfs_encoded_read_private {  static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,  					    struct bio *bio, int mirror_num)  { -	struct btrfs_encoded_read_private *priv = bio->bi_private; +	struct btrfs_encoded_read_private *priv = btrfs_bio(bio)->private;  	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	blk_status_t ret; @@ -10376,7 +10239,7 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,  static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)  {  	const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK); -	struct btrfs_encoded_read_private *priv = bbio->bio.bi_private; +	struct btrfs_encoded_read_private *priv = bbio->private;  	struct btrfs_inode *inode = priv->inode;  	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	u32 sectorsize = fs_info->sectorsize; @@ -10404,10 +10267,9 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)  	return BLK_STS_OK;  } -static void btrfs_encoded_read_endio(struct bio *bio) +static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)  { -	struct btrfs_encoded_read_private *priv = bio->bi_private; -	struct btrfs_bio *bbio = btrfs_bio(bio); +	struct btrfs_encoded_read_private *priv = bbio->private;  	blk_status_t status;  	status = btrfs_encoded_read_verify_csum(bbio); @@ -10425,7 +10287,7 @@ static void btrfs_encoded_read_endio(struct bio *bio)  	if (!atomic_dec_return(&priv->pending))  		wake_up(&priv->wait);  	btrfs_bio_free_csum(bbio); -	bio_put(bio); +	bio_put(&bbio->bio);  }  int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, @@ -10472,12 +10334,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,  			size_t bytes = min_t(u64, remaining, PAGE_SIZE);  			if (!bio) { -				bio = btrfs_bio_alloc(BIO_MAX_VECS); +				bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, +						      btrfs_encoded_read_endio, +						      &priv);  				bio->bi_iter.bi_sector =  					(disk_bytenr + cur) >> SECTOR_SHIFT; -				bio->bi_end_io = btrfs_encoded_read_endio; -				bio->bi_private = &priv; -				bio->bi_opf = REQ_OP_READ;  			}  			if (!bytes || @@ -10538,7 +10399,7 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,  	if (ret)  		goto out; -	unlock_extent_cached(io_tree, start, lockend, cached_state); +	unlock_extent(io_tree, start, lockend, cached_state);  	btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);  	*unlocked = true; @@ -10608,13 +10469,13 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,  					       lockend - start + 1);  		if (ret)  			goto out_unlock_inode; -		lock_extent_bits(io_tree, start, lockend, &cached_state); +		lock_extent(io_tree, start, lockend, &cached_state);  		ordered = btrfs_lookup_ordered_range(inode, start,  						     lockend - start + 1);  		if (!ordered)  			break;  		btrfs_put_ordered_extent(ordered); -		unlock_extent_cached(io_tree, start, lockend, &cached_state); +		unlock_extent(io_tree, start, lockend, &cached_state);  		cond_resched();  	} @@ -10688,7 +10549,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,  	em = NULL;  	if (disk_bytenr == EXTENT_MAP_HOLE) { -		unlock_extent_cached(io_tree, start, lockend, &cached_state); +		unlock_extent(io_tree, start, lockend, &cached_state);  		btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);  		unlocked = true;  		ret = iov_iter_zero(count, iter); @@ -10709,7 +10570,7 @@ out_em:  	free_extent_map(em);  out_unlock_extent:  	if (!unlocked) -		unlock_extent_cached(io_tree, start, lockend, &cached_state); +		unlock_extent(io_tree, start, lockend, &cached_state);  out_unlock_inode:  	if (!unlocked)  		btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED); @@ -10847,14 +10708,14 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,  						    end >> PAGE_SHIFT);  		if (ret)  			goto out_pages; -		lock_extent_bits(io_tree, start, end, &cached_state); +		lock_extent(io_tree, start, end, &cached_state);  		ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);  		if (!ordered &&  		    !filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))  			break;  		if (ordered)  			btrfs_put_ordered_extent(ordered); -		unlock_extent_cached(io_tree, start, end, &cached_state); +		unlock_extent(io_tree, start, end, &cached_state);  		cond_resched();  	} @@ -10908,7 +10769,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,  				       (1 << BTRFS_ORDERED_COMPRESSED),  				       compression);  	if (ret) { -		btrfs_drop_extent_cache(inode, start, end, 0); +		btrfs_drop_extent_map_range(inode, start, end, false);  		goto out_free_reserved;  	}  	btrfs_dec_block_group_reservations(fs_info, ins.objectid); @@ -10916,7 +10777,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,  	if (start + encoded->len > inode->vfs_inode.i_size)  		i_size_write(&inode->vfs_inode, start + encoded->len); -	unlock_extent_cached(io_tree, start, end, &cached_state); +	unlock_extent(io_tree, start, end, &cached_state);  	btrfs_delalloc_release_extents(inode, num_bytes); @@ -10947,7 +10808,7 @@ out_free_data_space:  	if (!extent_reserved)  		btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);  out_unlock: -	unlock_extent_cached(io_tree, start, end, &cached_state); +	unlock_extent(io_tree, start, end, &cached_state);  out_pages:  	for (i = 0; i < nr_pages; i++) {  		if (pages[i]) @@ -11188,7 +11049,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,  	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); -	lock_extent_bits(io_tree, 0, isize - 1, &cached_state); +	lock_extent(io_tree, 0, isize - 1, &cached_state);  	start = 0;  	while (start < isize) {  		u64 logical_block_start, physical_block_start; @@ -11229,7 +11090,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,  		free_extent_map(em);  		em = NULL; -		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true); +		ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, false, true);  		if (ret < 0) {  			goto out;  		} else if (ret) { @@ -11325,7 +11186,7 @@ out:  	if (!IS_ERR_OR_NULL(em))  		free_extent_map(em); -	unlock_extent_cached(io_tree, 0, isize - 1, &cached_state); +	unlock_extent(io_tree, 0, isize - 1, &cached_state);  	if (ret)  		btrfs_swap_deactivate(file);  |