diff options
Diffstat (limited to 'fs/btrfs/file.c')
| -rw-r--r-- | fs/btrfs/file.c | 216 | 
1 files changed, 154 insertions, 62 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb742c07e7a4..1f2b99cb55ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -40,6 +40,7 @@  #include "tree-log.h"  #include "locking.h"  #include "volumes.h" +#include "qgroup.h"  static struct kmem_cache *btrfs_inode_defrag_cachep;  /* @@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,  		write_bytes -= copied;  		total_copied += copied; -		/* Return to btrfs_file_aio_write to fault page */ +		/* Return to btrfs_file_write_iter to fault page */  		if (unlikely(copied == 0))  			break; @@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)  	for (i = 0; i < num_pages; i++) {  		/* page checked is some magic around finding pages that  		 * have been modified without going through btrfs_set_page_dirty -		 * clear it here +		 * clear it here. There should be no need to mark the pages +		 * accessed as prepare_pages should have marked them accessed +		 * in prepare_pages via find_or_create_page()  		 */  		ClearPageChecked(pages[i]);  		unlock_page(pages[i]); -		mark_page_accessed(pages[i]);  		page_cache_release(pages[i]);  	}  } @@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,  	int recow;  	int ret;  	int modify_tree = -1; -	int update_refs = (root->ref_cows || root == root->fs_info->tree_root); +	int update_refs;  	int found = 0;  	int leafs_visited = 0; @@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,  	if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)  		modify_tree = 0; +	update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || +		       root == root->fs_info->tree_root);  	while (1) {  		recow = 0;  		ret = btrfs_lookup_file_extent(trans, root, path, ino, @@ -780,6 +784,18 @@ next_slot:  			extent_end = search_start;  		} +		/* +		 * Don't skip extent items representing 0 byte lengths. They +		 * used to be created (bug) if while punching holes we hit +		 * -ENOSPC condition. So if we find one here, just ensure we +		 * delete it, otherwise we would insert a new file extent item +		 * with the same key (offset) as that 0 bytes length file +		 * extent item in the call to setup_items_for_insert() later +		 * in this function. +		 */ +		if (extent_end == key.offset && extent_end >= search_start) +			goto delete_extent_item; +  		if (extent_end <= search_start) {  			path->slots[0]++;  			goto next_slot; @@ -800,7 +816,7 @@ next_slot:  		if (start > key.offset && end < extent_end) {  			BUG_ON(del_nr > 0);  			if (extent_type == BTRFS_FILE_EXTENT_INLINE) { -				ret = -EINVAL; +				ret = -EOPNOTSUPP;  				break;  			} @@ -835,7 +851,7 @@ next_slot:  						disk_bytenr, num_bytes, 0,  						root->root_key.objectid,  						new_key.objectid, -						start - extent_offset, 0); +						start - extent_offset, 1);  				BUG_ON(ret); /* -ENOMEM */  			}  			key.offset = start; @@ -846,7 +862,7 @@ next_slot:  		 */  		if (start <= key.offset && end < extent_end) {  			if (extent_type == BTRFS_FILE_EXTENT_INLINE) { -				ret = -EINVAL; +				ret = -EOPNOTSUPP;  				break;  			} @@ -872,7 +888,7 @@ next_slot:  		if (start > key.offset && end >= extent_end) {  			BUG_ON(del_nr > 0);  			if (extent_type == BTRFS_FILE_EXTENT_INLINE) { -				ret = -EINVAL; +				ret = -EOPNOTSUPP;  				break;  			} @@ -893,6 +909,7 @@ next_slot:  		 *    | ------ extent ------ |  		 */  		if (start <= key.offset && end >= extent_end) { +delete_extent_item:  			if (del_nr == 0) {  				del_slot = path->slots[0];  				del_nr = 1; @@ -1191,7 +1208,7 @@ again:  		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,  					   root->root_key.objectid, -					   ino, orig_offset, 0); +					   ino, orig_offset, 1);  		BUG_ON(ret); /* -ENOMEM */  		if (split == start) { @@ -1658,27 +1675,22 @@ again:  }  static ssize_t __btrfs_direct_write(struct kiocb *iocb, -				    const struct iovec *iov, -				    unsigned long nr_segs, loff_t pos, -				    size_t count, size_t ocount) +				    struct iov_iter *from, +				    loff_t pos)  {  	struct file *file = iocb->ki_filp; -	struct iov_iter i;  	ssize_t written;  	ssize_t written_buffered;  	loff_t endbyte;  	int err; -	written = generic_file_direct_write(iocb, iov, &nr_segs, pos, -					    count, ocount); +	written = generic_file_direct_write(iocb, from, pos); -	if (written < 0 || written == count) +	if (written < 0 || !iov_iter_count(from))  		return written;  	pos += written; -	count -= written; -	iov_iter_init(&i, iov, nr_segs, count, written); -	written_buffered = __btrfs_buffered_write(file, &i, pos); +	written_buffered = __btrfs_buffered_write(file, from, pos);  	if (written_buffered < 0) {  		err = written_buffered;  		goto out; @@ -1713,9 +1725,8 @@ static void update_time_for_write(struct inode *inode)  		inode_inc_iversion(inode);  } -static ssize_t btrfs_file_aio_write(struct kiocb *iocb, -				    const struct iovec *iov, -				    unsigned long nr_segs, loff_t pos) +static ssize_t btrfs_file_write_iter(struct kiocb *iocb, +				    struct iov_iter *from)  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file_inode(file); @@ -1724,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,  	u64 end_pos;  	ssize_t num_written = 0;  	ssize_t err = 0; -	size_t count, ocount; +	size_t count = iov_iter_count(from);  	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); +	loff_t pos = iocb->ki_pos;  	mutex_lock(&inode->i_mutex); -	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); -	if (err) { -		mutex_unlock(&inode->i_mutex); -		goto out; -	} -	count = ocount; -  	current->backing_dev_info = inode->i_mapping->backing_dev_info;  	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));  	if (err) { @@ -1748,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,  		goto out;  	} +	iov_iter_truncate(from, count); +  	err = file_remove_suid(file);  	if (err) {  		mutex_unlock(&inode->i_mutex); @@ -1777,7 +1784,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,  	start_pos = round_down(pos, root->sectorsize);  	if (start_pos > i_size_read(inode)) {  		/* Expand hole size to cover write data, preventing empty gap */ -		end_pos = round_up(pos + iov->iov_len, root->sectorsize); +		end_pos = round_up(pos + count, root->sectorsize);  		err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);  		if (err) {  			mutex_unlock(&inode->i_mutex); @@ -1789,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,  		atomic_inc(&BTRFS_I(inode)->sync_writers);  	if (unlikely(file->f_flags & O_DIRECT)) { -		num_written = __btrfs_direct_write(iocb, iov, nr_segs, -						   pos, count, ocount); +		num_written = __btrfs_direct_write(iocb, from, pos);  	} else { -		struct iov_iter i; - -		iov_iter_init(&i, iov, nr_segs, count, num_written); - -		num_written = __btrfs_buffered_write(file, &i, pos); +		num_written = __btrfs_buffered_write(file, from, pos);  		if (num_written > 0)  			iocb->ki_pos = pos + num_written;  	} @@ -2009,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  		if (!full_sync) {  			ret = btrfs_wait_ordered_range(inode, start,  						       end - start + 1); -			if (ret) +			if (ret) { +				btrfs_end_transaction(trans, root);  				goto out; +			}  		}  		ret = btrfs_commit_transaction(trans, root);  	} else { @@ -2168,6 +2172,37 @@ out:  	return 0;  } +/* + * Find a hole extent on given inode and change start/len to the end of hole + * extent.(hole/vacuum extent whose em->start <= start && + *	   em->start + em->len > start) + * When a hole extent is found, return 1 and modify start/len. + */ +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) +{ +	struct extent_map *em; +	int ret = 0; + +	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0); +	if (IS_ERR_OR_NULL(em)) { +		if (!em) +			ret = -ENOMEM; +		else +			ret = PTR_ERR(em); +		return ret; +	} + +	/* Hole or vacuum extent(only exists in no-hole mode) */ +	if (em->block_start == EXTENT_MAP_HOLE) { +		ret = 1; +		*len = em->start + em->len > *start + *len ? +		       0 : *start + *len - em->start - em->len; +		*start = em->start + em->len; +	} +	free_extent_map(em); +	return ret; +} +  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  {  	struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2175,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	struct btrfs_path *path;  	struct btrfs_block_rsv *rsv;  	struct btrfs_trans_handle *trans; -	u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); -	u64 lockend = round_down(offset + len, -				 BTRFS_I(inode)->root->sectorsize) - 1; -	u64 cur_offset = lockstart; +	u64 lockstart; +	u64 lockend; +	u64 tail_start; +	u64 tail_len; +	u64 orig_start = offset; +	u64 cur_offset;  	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);  	u64 drop_end;  	int ret = 0;  	int err = 0;  	int rsv_count; -	bool same_page = ((offset >> PAGE_CACHE_SHIFT) == -			  ((offset + len - 1) >> PAGE_CACHE_SHIFT)); +	bool same_page;  	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); -	u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); +	u64 ino_size;  	ret = btrfs_wait_ordered_range(inode, offset, len);  	if (ret)  		return ret;  	mutex_lock(&inode->i_mutex); +	ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); +	ret = find_first_non_hole(inode, &offset, &len); +	if (ret < 0) +		goto out_only_mutex; +	if (ret && !len) { +		/* Already in a large hole */ +		ret = 0; +		goto out_only_mutex; +	} + +	lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); +	lockend = round_down(offset + len, +			     BTRFS_I(inode)->root->sectorsize) - 1; +	same_page = ((offset >> PAGE_CACHE_SHIFT) == +		    ((offset + len - 1) >> PAGE_CACHE_SHIFT)); +  	/*  	 * We needn't truncate any page which is beyond the end of the file  	 * because we are sure there is no data there. @@ -2205,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	if (same_page && len < PAGE_CACHE_SIZE) {  		if (offset < ino_size)  			ret = btrfs_truncate_page(inode, offset, len, 0); -		mutex_unlock(&inode->i_mutex); -		return ret; +		goto out_only_mutex;  	}  	/* zero back part of the first page */ @@ -2218,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		}  	} -	/* zero the front end of the last page */ -	if (offset + len < ino_size) { -		ret = btrfs_truncate_page(inode, offset + len, 0, 1); -		if (ret) { -			mutex_unlock(&inode->i_mutex); -			return ret; +	/* Check the aligned pages after the first unaligned page, +	 * if offset != orig_start, which means the first unaligned page +	 * including serveral following pages are already in holes, +	 * the extra check can be skipped */ +	if (offset == orig_start) { +		/* after truncate page, check hole again */ +		len = offset + len - lockstart; +		offset = lockstart; +		ret = find_first_non_hole(inode, &offset, &len); +		if (ret < 0) +			goto out_only_mutex; +		if (ret && !len) { +			ret = 0; +			goto out_only_mutex; +		} +		lockstart = offset; +	} + +	/* Check the tail unaligned part is in a hole */ +	tail_start = lockend + 1; +	tail_len = offset + len - tail_start; +	if (tail_len) { +		ret = find_first_non_hole(inode, &tail_start, &tail_len); +		if (unlikely(ret < 0)) +			goto out_only_mutex; +		if (!ret) { +			/* zero the front end of the last page */ +			if (tail_start + tail_len < ino_size) { +				ret = btrfs_truncate_page(inode, +						tail_start + tail_len, 0, 1); +				if (ret) +					goto out_only_mutex; +				}  		}  	} @@ -2249,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		if ((!ordered ||  		    (ordered->file_offset + ordered->len <= lockstart ||  		     ordered->file_offset > lockend)) && -		     !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, -				     lockend, EXTENT_UPTODATE, 0, -				     cached_state)) { +		     !btrfs_page_exists_in_range(inode, lockstart, lockend)) {  			if (ordered)  				btrfs_put_ordered_extent(ordered);  			break; @@ -2299,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	BUG_ON(ret);  	trans->block_rsv = rsv; +	cur_offset = lockstart; +	len = lockend - cur_offset;  	while (cur_offset < lockend) {  		ret = __btrfs_drop_extents(trans, root, inode, path,  					   cur_offset, lockend + 1, @@ -2339,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  					      rsv, min_size);  		BUG_ON(ret);	/* shouldn't happen */  		trans->block_rsv = rsv; + +		ret = find_first_non_hole(inode, &cur_offset, &len); +		if (unlikely(ret < 0)) +			break; +		if (ret && !len) { +			ret = 0; +			break; +		}  	}  	if (ret) { @@ -2347,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	}  	trans->block_rsv = &root->fs_info->trans_block_rsv; -	if (cur_offset < ino_size) { +	/* +	 * Don't insert file hole extent item if it's for a range beyond eof +	 * (because it's useless) or if it represents a 0 bytes range (when +	 * cur_offset == drop_end). +	 */ +	if (cur_offset < ino_size && cur_offset < drop_end) {  		ret = fill_holes(trans, inode, path, cur_offset, drop_end);  		if (ret) {  			err = ret; @@ -2372,6 +2463,7 @@ out_free:  out:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,  			     &cached_state, GFP_NOFS); +out_only_mutex:  	mutex_unlock(&inode->i_mutex);  	if (ret && !err)  		err = ret; @@ -2633,11 +2725,11 @@ out:  const struct file_operations btrfs_file_operations = {  	.llseek		= btrfs_file_llseek, -	.read		= do_sync_read, -	.write		= do_sync_write, -	.aio_read       = generic_file_aio_read, +	.read		= new_sync_read, +	.write		= new_sync_write, +	.read_iter      = generic_file_read_iter,  	.splice_read	= generic_file_splice_read, -	.aio_write	= btrfs_file_aio_write, +	.write_iter	= btrfs_file_write_iter,  	.mmap		= btrfs_file_mmap,  	.open		= generic_file_open,  	.release	= btrfs_release_file,  |