diff options
Diffstat (limited to 'fs/btrfs/file.c')
| -rw-r--r-- | fs/btrfs/file.c | 435 | 
1 files changed, 288 insertions, 147 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58a18ed11546..8fe4eb7e5045 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -537,8 +537,8 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,  	 * we can set things up properly  	 */  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, -			 EXTENT_DIRTY | EXTENT_DELALLOC | -			 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); +			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, +			 0, 0, cached);  	if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {  		if (start_pos >= isize && @@ -559,7 +559,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,  	}  	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, -					extra_bits, cached, 0); +					extra_bits, cached);  	if (err)  		return err; @@ -1882,10 +1882,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	u64 start_pos;  	u64 end_pos;  	ssize_t num_written = 0; -	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); +	const bool sync = iocb->ki_flags & IOCB_DSYNC;  	ssize_t err;  	loff_t pos; -	size_t count = iov_iter_count(from); +	size_t count;  	loff_t oldsize;  	int clean_page = 0; @@ -1906,6 +1906,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	}  	pos = iocb->ki_pos; +	count = iov_iter_count(from);  	if (iocb->ki_flags & IOCB_NOWAIT) {  		/*  		 * We will allocate space in case nodatacow is not set, @@ -2439,27 +2440,286 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,  	return 0;  } +static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans, +				     struct inode *inode, +				     struct btrfs_path *path, +				     struct btrfs_clone_extent_info *clone_info, +				     const u64 clone_len) +{ +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct btrfs_file_extent_item *extent; +	struct extent_buffer *leaf; +	struct btrfs_key key; +	int slot; +	struct btrfs_ref ref = { 0 }; +	u64 ref_offset; +	int ret; + +	if (clone_len == 0) +		return 0; + +	if (clone_info->disk_offset == 0 && +	    btrfs_fs_incompat(fs_info, NO_HOLES)) +		return 0; + +	key.objectid = btrfs_ino(BTRFS_I(inode)); +	key.type = BTRFS_EXTENT_DATA_KEY; +	key.offset = clone_info->file_offset; +	ret = btrfs_insert_empty_item(trans, root, path, &key, +				      clone_info->item_size); +	if (ret) +		return ret; +	leaf = path->nodes[0]; +	slot = path->slots[0]; +	write_extent_buffer(leaf, clone_info->extent_buf, +			    btrfs_item_ptr_offset(leaf, slot), +			    clone_info->item_size); +	extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); +	btrfs_set_file_extent_offset(leaf, extent, clone_info->data_offset); +	btrfs_set_file_extent_num_bytes(leaf, extent, clone_len); +	btrfs_mark_buffer_dirty(leaf); +	btrfs_release_path(path); + +	/* If it's a hole, nothing more needs to be done. */ +	if (clone_info->disk_offset == 0) +		return 0; + +	inode_add_bytes(inode, clone_len); +	btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, +			       clone_info->disk_offset, +			       clone_info->disk_len, 0); +	ref_offset = clone_info->file_offset - clone_info->data_offset; +	btrfs_init_data_ref(&ref, root->root_key.objectid, +			    btrfs_ino(BTRFS_I(inode)), ref_offset); +	ret = btrfs_inc_extent_ref(trans, &ref); + +	return ret; +} + +/* + * The respective range must have been previously locked, as well as the inode. + * The end offset is inclusive (last byte of the range). + * @clone_info is NULL for fallocate's hole punching and non-NULL for extent + * cloning. + * When cloning, we don't want to end up in a state where we dropped extents + * without inserting a new one, so we must abort the transaction to avoid a + * corruption. + */ +int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, +			   const u64 start, const u64 end, +			   struct btrfs_clone_extent_info *clone_info, +			   struct btrfs_trans_handle **trans_out) +{ +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +	u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1); +	u64 ino_size = round_up(inode->i_size, fs_info->sectorsize); +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct btrfs_trans_handle *trans = NULL; +	struct btrfs_block_rsv *rsv; +	unsigned int rsv_count; +	u64 cur_offset; +	u64 drop_end; +	u64 len = end - start; +	int ret = 0; + +	if (end <= start) +		return -EINVAL; + +	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); +	if (!rsv) { +		ret = -ENOMEM; +		goto out; +	} +	rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1); +	rsv->failfast = 1; + +	/* +	 * 1 - update the inode +	 * 1 - removing the extents in the range +	 * 1 - adding the hole extent if no_holes isn't set or if we are cloning +	 *     an extent +	 */ +	if (!btrfs_fs_incompat(fs_info, NO_HOLES) || clone_info) +		rsv_count = 3; +	else +		rsv_count = 2; + +	trans = btrfs_start_transaction(root, rsv_count); +	if (IS_ERR(trans)) { +		ret = PTR_ERR(trans); +		trans = NULL; +		goto out_free; +	} + +	ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, +				      min_size, false); +	BUG_ON(ret); +	trans->block_rsv = rsv; + +	cur_offset = start; +	while (cur_offset < end) { +		ret = __btrfs_drop_extents(trans, root, inode, path, +					   cur_offset, end + 1, &drop_end, +					   1, 0, 0, NULL); +		if (ret != -ENOSPC) { +			/* +			 * When cloning we want to avoid transaction aborts when +			 * nothing was done and we are attempting to clone parts +			 * of inline extents, in such cases -EOPNOTSUPP is +			 * returned by __btrfs_drop_extents() without having +			 * changed anything in the file. +			 */ +			if (clone_info && ret && ret != -EOPNOTSUPP) +				btrfs_abort_transaction(trans, ret); +			break; +		} + +		trans->block_rsv = &fs_info->trans_block_rsv; + +		if (!clone_info && cur_offset < drop_end && +		    cur_offset < ino_size) { +			ret = fill_holes(trans, BTRFS_I(inode), path, +					cur_offset, drop_end); +			if (ret) { +				/* +				 * If we failed then we didn't insert our hole +				 * entries for the area we dropped, so now the +				 * fs is corrupted, so we must abort the +				 * transaction. +				 */ +				btrfs_abort_transaction(trans, ret); +				break; +			} +		} + +		if (clone_info) { +			u64 clone_len = drop_end - cur_offset; + +			ret = btrfs_insert_clone_extent(trans, inode, path, +							clone_info, clone_len); +			if (ret) { +				btrfs_abort_transaction(trans, ret); +				break; +			} +			clone_info->data_len -= clone_len; +			clone_info->data_offset += clone_len; +			clone_info->file_offset += clone_len; +		} + +		cur_offset = drop_end; + +		ret = btrfs_update_inode(trans, root, inode); +		if (ret) +			break; + +		btrfs_end_transaction(trans); +		btrfs_btree_balance_dirty(fs_info); + +		trans = btrfs_start_transaction(root, rsv_count); +		if (IS_ERR(trans)) { +			ret = PTR_ERR(trans); +			trans = NULL; +			break; +		} + +		ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, +					      rsv, min_size, false); +		BUG_ON(ret);	/* shouldn't happen */ +		trans->block_rsv = rsv; + +		if (!clone_info) { +			ret = find_first_non_hole(inode, &cur_offset, &len); +			if (unlikely(ret < 0)) +				break; +			if (ret && !len) { +				ret = 0; +				break; +			} +		} +	} + +	/* +	 * If we were cloning, force the next fsync to be a full one since we +	 * we replaced (or just dropped in the case of cloning holes when +	 * NO_HOLES is enabled) extents and extent maps. +	 * This is for the sake of simplicity, and cloning into files larger +	 * than 16Mb would force the full fsync any way (when +	 * try_release_extent_mapping() is invoked during page cache truncation. +	 */ +	if (clone_info) +		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +			&BTRFS_I(inode)->runtime_flags); + +	if (ret) +		goto out_trans; + +	trans->block_rsv = &fs_info->trans_block_rsv; +	/* +	 * If we are using the NO_HOLES feature we might have had already an +	 * hole that overlaps a part of the region [lockstart, lockend] and +	 * ends at (or beyond) lockend. Since we have no file extent items to +	 * represent holes, drop_end can be less than lockend and so we must +	 * make sure we have an extent map representing the existing hole (the +	 * call to __btrfs_drop_extents() might have dropped the existing extent +	 * map representing the existing hole), otherwise the fast fsync path +	 * will not record the existence of the hole region +	 * [existing_hole_start, lockend]. +	 */ +	if (drop_end <= end) +		drop_end = end + 1; +	/* +	 * Don't insert file hole extent item if it's for a range beyond eof +	 * (because it's useless) or if it represents a 0 bytes range (when +	 * cur_offset == drop_end). +	 */ +	if (!clone_info && cur_offset < ino_size && cur_offset < drop_end) { +		ret = fill_holes(trans, BTRFS_I(inode), path, +				cur_offset, drop_end); +		if (ret) { +			/* Same comment as above. */ +			btrfs_abort_transaction(trans, ret); +			goto out_trans; +		} +	} +	if (clone_info) { +		ret = btrfs_insert_clone_extent(trans, inode, path, clone_info, +						clone_info->data_len); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_trans; +		} +	} + +out_trans: +	if (!trans) +		goto out_free; + +	trans->block_rsv = &fs_info->trans_block_rsv; +	if (ret) +		btrfs_end_transaction(trans); +	else +		*trans_out = trans; +out_free: +	btrfs_free_block_rsv(fs_info, rsv); +out: +	return ret; +} +  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  {  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct extent_state *cached_state = NULL;  	struct btrfs_path *path; -	struct btrfs_block_rsv *rsv; -	struct btrfs_trans_handle *trans; +	struct btrfs_trans_handle *trans = NULL;  	u64 lockstart;  	u64 lockend;  	u64 tail_start;  	u64 tail_len;  	u64 orig_start = offset; -	u64 cur_offset; -	u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1); -	u64 drop_end;  	int ret = 0; -	int err = 0; -	unsigned int rsv_count;  	bool same_block; -	bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES);  	u64 ino_size;  	bool truncated_block = false;  	bool updated_inode = false; @@ -2566,145 +2826,24 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		goto out;  	} -	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); -	if (!rsv) { -		ret = -ENOMEM; -		goto out_free; -	} -	rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1); -	rsv->failfast = 1; - -	/* -	 * 1 - update the inode -	 * 1 - removing the extents in the range -	 * 1 - adding the hole extent if no_holes isn't set -	 */ -	rsv_count = no_holes ? 2 : 3; -	trans = btrfs_start_transaction(root, rsv_count); -	if (IS_ERR(trans)) { -		err = PTR_ERR(trans); -		goto out_free; -	} - -	ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, -				      min_size, false); -	BUG_ON(ret); -	trans->block_rsv = rsv; - -	cur_offset = lockstart; -	len = lockend - cur_offset; -	while (cur_offset < lockend) { -		ret = __btrfs_drop_extents(trans, root, inode, path, -					   cur_offset, lockend + 1, -					   &drop_end, 1, 0, 0, NULL); -		if (ret != -ENOSPC) -			break; - -		trans->block_rsv = &fs_info->trans_block_rsv; - -		if (cur_offset < drop_end && cur_offset < ino_size) { -			ret = fill_holes(trans, BTRFS_I(inode), path, -					cur_offset, drop_end); -			if (ret) { -				/* -				 * If we failed then we didn't insert our hole -				 * entries for the area we dropped, so now the -				 * fs is corrupted, so we must abort the -				 * transaction. -				 */ -				btrfs_abort_transaction(trans, ret); -				err = ret; -				break; -			} -		} - -		cur_offset = drop_end; - -		ret = btrfs_update_inode(trans, root, inode); -		if (ret) { -			err = ret; -			break; -		} - -		btrfs_end_transaction(trans); -		btrfs_btree_balance_dirty(fs_info); - -		trans = btrfs_start_transaction(root, rsv_count); -		if (IS_ERR(trans)) { -			ret = PTR_ERR(trans); -			trans = NULL; -			break; -		} - -		ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, -					      rsv, min_size, false); -		BUG_ON(ret);	/* shouldn't happen */ -		trans->block_rsv = rsv; - -		ret = find_first_non_hole(inode, &cur_offset, &len); -		if (unlikely(ret < 0)) -			break; -		if (ret && !len) { -			ret = 0; -			break; -		} -	} - -	if (ret) { -		err = ret; -		goto out_trans; -	} - -	trans->block_rsv = &fs_info->trans_block_rsv; -	/* -	 * If we are using the NO_HOLES feature we might have had already an -	 * hole that overlaps a part of the region [lockstart, lockend] and -	 * ends at (or beyond) lockend. Since we have no file extent items to -	 * represent holes, drop_end can be less than lockend and so we must -	 * make sure we have an extent map representing the existing hole (the -	 * call to __btrfs_drop_extents() might have dropped the existing extent -	 * map representing the existing hole), otherwise the fast fsync path -	 * will not record the existence of the hole region -	 * [existing_hole_start, lockend]. -	 */ -	if (drop_end <= lockend) -		drop_end = lockend + 1; -	/* -	 * Don't insert file hole extent item if it's for a range beyond eof -	 * (because it's useless) or if it represents a 0 bytes range (when -	 * cur_offset == drop_end). -	 */ -	if (cur_offset < ino_size && cur_offset < drop_end) { -		ret = fill_holes(trans, BTRFS_I(inode), path, -				cur_offset, drop_end); -		if (ret) { -			/* Same comment as above. */ -			btrfs_abort_transaction(trans, ret); -			err = ret; -			goto out_trans; -		} -	} - -out_trans: -	if (!trans) -		goto out_free; +	ret = btrfs_punch_hole_range(inode, path, lockstart, lockend, NULL, +				     &trans); +	btrfs_free_path(path); +	if (ret) +		goto out; +	ASSERT(trans != NULL);  	inode_inc_iversion(inode);  	inode->i_mtime = inode->i_ctime = current_time(inode); - -	trans->block_rsv = &fs_info->trans_block_rsv;  	ret = btrfs_update_inode(trans, root, inode);  	updated_inode = true;  	btrfs_end_transaction(trans);  	btrfs_btree_balance_dirty(fs_info); -out_free: -	btrfs_free_path(path); -	btrfs_free_block_rsv(fs_info, rsv);  out:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,  			     &cached_state);  out_only_mutex: -	if (!updated_inode && truncated_block && !ret && !err) { +	if (!updated_inode && truncated_block && !ret) {  		/*  		 * If we only end up zeroing part of a page, we still need to  		 * update the inode item, so that all the time fields are @@ -2719,16 +2858,18 @@ out_only_mutex:  		inode->i_ctime = now;  		trans = btrfs_start_transaction(root, 1);  		if (IS_ERR(trans)) { -			err = PTR_ERR(trans); +			ret = PTR_ERR(trans);  		} else { -			err = btrfs_update_inode(trans, root, inode); -			ret = btrfs_end_transaction(trans); +			int ret2; + +			ret = btrfs_update_inode(trans, root, inode); +			ret2 = btrfs_end_transaction(trans); +			if (!ret) +				ret = ret2;  		}  	}  	inode_unlock(inode); -	if (ret && !err) -		err = ret; -	return err; +	return ret;  }  /* Helper structure to record which range is already reserved */  |