diff options
Diffstat (limited to 'fs/btrfs/file.c')
| -rw-r--r-- | fs/btrfs/file.c | 128 | 
1 files changed, 21 insertions, 107 deletions
| diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 51e77d72068a..2be00e873e92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -5,14 +5,11 @@  #include <linux/fs.h>  #include <linux/pagemap.h> -#include <linux/highmem.h>  #include <linux/time.h>  #include <linux/init.h>  #include <linux/string.h>  #include <linux/backing-dev.h> -#include <linux/mpage.h>  #include <linux/falloc.h> -#include <linux/swap.h>  #include <linux/writeback.h>  #include <linux/compat.h>  #include <linux/slab.h> @@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,  static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,  				    struct inode_defrag *defrag)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	struct inode_defrag *entry;  	struct rb_node **p;  	struct rb_node *parent = NULL; @@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)  int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,  			   struct btrfs_inode *inode)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);  	struct btrfs_root *root = inode->root; +	struct btrfs_fs_info *fs_info = root->fs_info;  	struct inode_defrag *defrag;  	u64 transid;  	int ret; @@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,  static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,  				       struct inode_defrag *defrag)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	int ret;  	if (!__need_auto_defrag(fs_info)) @@ -833,8 +830,7 @@ next_slot:  				btrfs_file_extent_num_bytes(leaf, fi);  		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {  			extent_end = key.offset + -				btrfs_file_extent_inline_len(leaf, -						     path->slots[0], fi); +				btrfs_file_extent_ram_bytes(leaf, fi);  		} else {  			/* can't happen */  			BUG(); @@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,  int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,  			      struct btrfs_inode *inode, u64 start, u64 end)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = trans->fs_info;  	struct btrfs_root *root = inode->root;  	struct extent_buffer *leaf;  	struct btrfs_path *path; @@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,  				u64 *lockstart, u64 *lockend,  				struct extent_state **cached_state)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	u64 start_pos;  	u64 last_pos;  	int i; @@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,  static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,  				    size_t *write_bytes)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	struct btrfs_root *root = inode->root;  	struct btrfs_ordered_extent *ordered;  	u64 lockstart, lockend; @@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,  	return ret;  } -static noinline ssize_t __btrfs_buffered_write(struct file *file, -					       struct iov_iter *i, -					       loff_t pos) +static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, +					       struct iov_iter *i)  { +	struct file *file = iocb->ki_filp; +	loff_t pos = iocb->ki_pos;  	struct inode *inode = file_inode(file);  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file_inode(file); -	loff_t pos = iocb->ki_pos; +	loff_t pos;  	ssize_t written;  	ssize_t written_buffered;  	loff_t endbyte; @@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)  	if (written < 0 || !iov_iter_count(from))  		return written; -	pos += written; -	written_buffered = __btrfs_buffered_write(file, from, pos); +	pos = iocb->ki_pos; +	written_buffered = btrfs_buffered_write(iocb, from);  	if (written_buffered < 0) {  		err = written_buffered;  		goto out; @@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	if (iocb->ki_flags & IOCB_DIRECT) {  		num_written = __btrfs_direct_write(iocb, from);  	} else { -		num_written = __btrfs_buffered_write(file, from, pos); +		num_written = btrfs_buffered_write(iocb, from);  		if (num_written > 0)  			iocb->ki_pos = pos + num_written;  		if (clean_page) @@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	struct btrfs_trans_handle *trans;  	struct btrfs_log_ctx ctx;  	int ret = 0, err; -	bool full_sync = false;  	u64 len;  	/* @@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	inode_lock(inode);  	atomic_inc(&root->log_batch); -	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, -			     &BTRFS_I(inode)->runtime_flags); +  	/* -	 * We might have have had more pages made dirty after calling -	 * start_ordered_ops and before acquiring the inode's i_mutex. +	 * We have to do this here to avoid the priority inversion of waiting on +	 * IO of a lower priority task while holding a transaciton open.  	 */ -	if (full_sync) { -		/* -		 * For a full sync, we need to make sure any ordered operations -		 * start and finish before we start logging the inode, so that -		 * all extents are persisted and the respective file extent -		 * items are in the fs/subvol btree. -		 */ -		ret = btrfs_wait_ordered_range(inode, start, len); -	} else { -		/* -		 * Start any new ordered operations before starting to log the -		 * inode. We will wait for them to finish in btrfs_sync_log(). -		 * -		 * Right before acquiring the inode's mutex, we might have new -		 * writes dirtying pages, which won't immediately start the -		 * respective ordered operations - that is done through the -		 * fill_delalloc callbacks invoked from the writepage and -		 * writepages address space operations. So make sure we start -		 * all ordered operations before starting to log our inode. Not -		 * doing this means that while logging the inode, writeback -		 * could start and invoke writepage/writepages, which would call -		 * the fill_delalloc callbacks (cow_file_range, -		 * submit_compressed_extents). These callbacks add first an -		 * extent map to the modified list of extents and then create -		 * the respective ordered operation, which means in -		 * tree-log.c:btrfs_log_inode() we might capture all existing -		 * ordered operations (with btrfs_get_logged_extents()) before -		 * the fill_delalloc callback adds its ordered operation, and by -		 * the time we visit the modified list of extent maps (with -		 * btrfs_log_changed_extents()), we see and process the extent -		 * map they created. We then use the extent map to construct a -		 * file extent item for logging without waiting for the -		 * respective ordered operation to finish - this file extent -		 * item points to a disk location that might not have yet been -		 * written to, containing random data - so after a crash a log -		 * replay will make our inode have file extent items that point -		 * to disk locations containing invalid data, as we returned -		 * success to userspace without waiting for the respective -		 * ordered operation to finish, because it wasn't captured by -		 * btrfs_get_logged_extents(). -		 */ -		ret = start_ordered_ops(inode, start, end); -	} +	ret = btrfs_wait_ordered_range(inode, start, len);  	if (ret) {  		inode_unlock(inode);  		goto out;  	}  	atomic_inc(&root->log_batch); -	/* -	 * If the last transaction that changed this file was before the current -	 * transaction and we have the full sync flag set in our inode, we can -	 * bail out now without any syncing. -	 * -	 * Note that we can't bail out if the full sync flag isn't set. This is -	 * because when the full sync flag is set we start all ordered extents -	 * and wait for them to fully complete - when they complete they update -	 * the inode's last_trans field through: -	 * -	 *     btrfs_finish_ordered_io() -> -	 *         btrfs_update_inode_fallback() -> -	 *             btrfs_update_inode() -> -	 *                 btrfs_set_inode_last_trans() -	 * -	 * So we are sure that last_trans is up to date and can do this check to -	 * bail out safely. For the fast path, when the full sync flag is not -	 * set in our inode, we can not do it because we start only our ordered -	 * extents and don't wait for them to complete (that is when -	 * btrfs_finish_ordered_io runs), so here at this point their last_trans -	 * value might be less than or equals to fs_info->last_trans_committed, -	 * and setting a speculative last_trans for an inode when a buffered -	 * write is made (such as fs_info->generation + 1 for example) would not -	 * be reliable since after setting the value and before fsync is called -	 * any number of transactions can start and commit (transaction kthread -	 * commits the current transaction periodically), and a transaction -	 * commit does not start nor waits for ordered extents to complete. -	 */  	smp_mb();  	if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || -	    (full_sync && BTRFS_I(inode)->last_trans <= -	     fs_info->last_trans_committed) || -	    (!btrfs_have_ordered_extents_in_range(inode, start, len) && -	     BTRFS_I(inode)->last_trans -	     <= fs_info->last_trans_committed)) { +	    BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {  		/*  		 * We've had everything committed since the last time we were  		 * modified so clear this flag in case it was set for whatever @@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  				goto out;  			}  		} -		if (!full_sync) { -			ret = btrfs_wait_ordered_range(inode, start, len); -			if (ret) { -				btrfs_end_transaction(trans); -				goto out; -			} -		}  		ret = btrfs_commit_transaction(trans);  	} else {  		ret = btrfs_end_transaction(trans); @@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,  		struct btrfs_inode *inode,  		struct btrfs_path *path, u64 offset, u64 end)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = trans->fs_info;  	struct btrfs_root *root = inode->root;  	struct extent_buffer *leaf;  	struct btrfs_file_extent_item *fi; |