diff options
Diffstat (limited to 'fs/xfs/xfs_aops.c')
| -rw-r--r-- | fs/xfs/xfs_aops.c | 309 | 
1 files changed, 16 insertions, 293 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf829..0f56fcd3a5d5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -37,11 +37,6 @@  #include <linux/pagevec.h>  #include <linux/writeback.h> -/* flags for direct write completions */ -#define XFS_DIO_FLAG_UNWRITTEN	(1 << 0) -#define XFS_DIO_FLAG_APPEND	(1 << 1) -#define XFS_DIO_FLAG_COW	(1 << 2) -  /*   * structure owned by writepages passed to individual writepage calls   */ @@ -495,8 +490,8 @@ xfs_submit_ioend(  	ioend->io_bio->bi_private = ioend;  	ioend->io_bio->bi_end_io = xfs_end_bio; -	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, -			 (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); +	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); +  	/*  	 * If we are failing the IO now, just mark the ioend with an  	 * error and finish it. This will run IO completion immediately @@ -567,8 +562,7 @@ xfs_chain_bio(  	bio_chain(ioend->io_bio, new);  	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */ -	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, -			  (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); +	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);  	submit_bio(ioend->io_bio);  	ioend->io_bio = new;  } @@ -777,7 +771,7 @@ xfs_map_cow(  {  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_bmbt_irec	imap; -	bool			is_cow = false, need_alloc = false; +	bool			is_cow = false;  	int			error;  	/* @@ -795,7 +789,7 @@ xfs_map_cow(  	 * Else we need to check if there is a COW mapping at this offset.  	 */  	xfs_ilock(ip, XFS_ILOCK_SHARED); -	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); +	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);  	xfs_iunlock(ip, XFS_ILOCK_SHARED);  	if (!is_cow) @@ -805,7 +799,7 @@ xfs_map_cow(  	 * And if the COW mapping has a delayed extent here we need to  	 * allocate real space for it now.  	 */ -	if (need_alloc) { +	if (isnullstartblock(imap.br_startblock)) {  		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,  				&imap);  		if (error) @@ -1176,45 +1170,6 @@ xfs_vm_releasepage(  }  /* - * When we map a DIO buffer, we may need to pass flags to - * xfs_end_io_direct_write to tell it what kind of write IO we are doing. - * - * Note that for DIO, an IO to the highest supported file block offset (i.e. - * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 - * bit variable. Hence if we see this overflow, we have to assume that the IO is - * extending the file size. We won't know for sure until IO completion is run - * and the actual max write offset is communicated to the IO completion - * routine. - */ -static void -xfs_map_direct( -	struct inode		*inode, -	struct buffer_head	*bh_result, -	struct xfs_bmbt_irec	*imap, -	xfs_off_t		offset, -	bool			is_cow) -{ -	uintptr_t		*flags = (uintptr_t *)&bh_result->b_private; -	xfs_off_t		size = bh_result->b_size; - -	trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, -		ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW : -		XFS_IO_OVERWRITE, imap); - -	if (ISUNWRITTEN(imap)) { -		*flags |= XFS_DIO_FLAG_UNWRITTEN; -		set_buffer_defer_completion(bh_result); -	} else if (is_cow) { -		*flags |= XFS_DIO_FLAG_COW; -		set_buffer_defer_completion(bh_result); -	} -	if (offset + size > i_size_read(inode) || offset + size < 0) { -		*flags |= XFS_DIO_FLAG_APPEND; -		set_buffer_defer_completion(bh_result); -	} -} - -/*   * If this is O_DIRECT or the mpage code calling tell them how large the mapping   * is, so that we can avoid repeated get_blocks calls.   * @@ -1254,52 +1209,12 @@ xfs_map_trim_size(  	bh_result->b_size = mapping_size;  } -/* Bounce unaligned directio writes to the page cache. */  static int -xfs_bounce_unaligned_dio_write( -	struct xfs_inode	*ip, -	xfs_fileoff_t		offset_fsb, -	struct xfs_bmbt_irec	*imap) -{ -	struct xfs_bmbt_irec	irec; -	xfs_fileoff_t		delta; -	bool			shared; -	bool			x; -	int			error; - -	irec = *imap; -	if (offset_fsb > irec.br_startoff) { -		delta = offset_fsb - irec.br_startoff; -		irec.br_blockcount -= delta; -		irec.br_startblock += delta; -		irec.br_startoff = offset_fsb; -	} -	error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); -	if (error) -		return error; - -	/* -	 * We're here because we're trying to do a directio write to a -	 * region that isn't aligned to a filesystem block.  If any part -	 * of the extent is shared, fall back to buffered mode to handle -	 * the RMW.  This is done by returning -EREMCHG ("remote addr -	 * changed"), which is caught further up the call stack. -	 */ -	if (shared) { -		trace_xfs_reflink_bounce_dio_write(ip, imap); -		return -EREMCHG; -	} -	return 0; -} - -STATIC int -__xfs_get_blocks( +xfs_get_blocks(  	struct inode		*inode,  	sector_t		iblock,  	struct buffer_head	*bh_result, -	int			create, -	bool			direct, -	bool			dax_fault) +	int			create)  {  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount; @@ -1310,11 +1225,8 @@ __xfs_get_blocks(  	int			nimaps = 1;  	xfs_off_t		offset;  	ssize_t			size; -	int			new = 0; -	bool			is_cow = false; -	bool			need_alloc = false; -	BUG_ON(create && !direct); +	BUG_ON(create);  	if (XFS_FORCED_SHUTDOWN(mp))  		return -EIO; @@ -1323,7 +1235,7 @@ __xfs_get_blocks(  	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));  	size = bh_result->b_size; -	if (!create && offset >= i_size_read(inode)) +	if (offset >= i_size_read(inode))  		return 0;  	/* @@ -1338,52 +1250,12 @@ __xfs_get_blocks(  	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);  	offset_fsb = XFS_B_TO_FSBT(mp, offset); -	if (create && direct && xfs_is_reflink_inode(ip)) -		is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, -					&need_alloc); -	if (!is_cow) { -		error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, -					&imap, &nimaps, XFS_BMAPI_ENTIRE); -		/* -		 * Truncate an overwrite extent if there's a pending CoW -		 * reservation before the end of this extent.  This -		 * forces us to come back to get_blocks to take care of -		 * the CoW. -		 */ -		if (create && direct && nimaps && -		    imap.br_startblock != HOLESTARTBLOCK && -		    imap.br_startblock != DELAYSTARTBLOCK && -		    !ISUNWRITTEN(&imap)) -			xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, -					&imap); -	} -	ASSERT(!need_alloc); +	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, +				&imap, &nimaps, XFS_BMAPI_ENTIRE);  	if (error)  		goto out_unlock; -	/* for DAX, we convert unwritten extents directly */ -	if (create && -	    (!nimaps || -	     (imap.br_startblock == HOLESTARTBLOCK || -	      imap.br_startblock == DELAYSTARTBLOCK) || -	     (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { -		/* -		 * xfs_iomap_write_direct() expects the shared lock. It -		 * is unlocked on return. -		 */ -		if (lockmode == XFS_ILOCK_EXCL) -			xfs_ilock_demote(ip, lockmode); - -		error = xfs_iomap_write_direct(ip, offset, size, -					       &imap, nimaps); -		if (error) -			return error; -		new = 1; - -		trace_xfs_get_blocks_alloc(ip, offset, size, -				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN -						   : XFS_IO_DELALLOC, &imap); -	} else if (nimaps) { +	if (nimaps) {  		trace_xfs_get_blocks_found(ip, offset, size,  				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN  						   : XFS_IO_OVERWRITE, &imap); @@ -1393,12 +1265,6 @@ __xfs_get_blocks(  		goto out_unlock;  	} -	if (IS_DAX(inode) && create) { -		ASSERT(!ISUNWRITTEN(&imap)); -		/* zeroing is not needed at a higher layer */ -		new = 0; -	} -  	/* trim mapping down to size requested */  	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); @@ -1408,50 +1274,14 @@ __xfs_get_blocks(  	 */  	if (imap.br_startblock != HOLESTARTBLOCK &&  	    imap.br_startblock != DELAYSTARTBLOCK && -	    (create || !ISUNWRITTEN(&imap))) { -		if (create && direct && !is_cow) { -			error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, -					&imap); -			if (error) -				return error; -		} - +	    !ISUNWRITTEN(&imap))  		xfs_map_buffer(inode, bh_result, &imap, offset); -		if (ISUNWRITTEN(&imap)) -			set_buffer_unwritten(bh_result); -		/* direct IO needs special help */ -		if (create) { -			if (dax_fault) -				ASSERT(!ISUNWRITTEN(&imap)); -			else -				xfs_map_direct(inode, bh_result, &imap, offset, -						is_cow); -		} -	}  	/*  	 * If this is a realtime file, data may be on a different device.  	 * to that pointed to from the buffer_head b_bdev currently.  	 */  	bh_result->b_bdev = xfs_find_bdev_for_inode(inode); - -	/* -	 * If we previously allocated a block out beyond eof and we are now -	 * coming back to use it then we will need to flag it as new even if it -	 * has a disk address. -	 * -	 * With sub-block writes into unwritten extents we also need to mark -	 * the buffer as new so that the unwritten parts of the buffer gets -	 * correctly zeroed. -	 */ -	if (create && -	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || -	     (offset >= i_size_read(inode)) || -	     (new || ISUNWRITTEN(&imap)))) -		set_buffer_new(bh_result); - -	BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); -  	return 0;  out_unlock: @@ -1459,110 +1289,6 @@ out_unlock:  	return error;  } -int -xfs_get_blocks( -	struct inode		*inode, -	sector_t		iblock, -	struct buffer_head	*bh_result, -	int			create) -{ -	return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); -} - -int -xfs_get_blocks_direct( -	struct inode		*inode, -	sector_t		iblock, -	struct buffer_head	*bh_result, -	int			create) -{ -	return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); -} - -int -xfs_get_blocks_dax_fault( -	struct inode		*inode, -	sector_t		iblock, -	struct buffer_head	*bh_result, -	int			create) -{ -	return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); -} - -/* - * Complete a direct I/O write request. - * - * xfs_map_direct passes us some flags in the private data to tell us what to - * do.  If no flags are set, then the write IO is an overwrite wholly within - * the existing allocated file size and so there is nothing for us to do. - * - * Note that in this case the completion can be called in interrupt context, - * whereas if we have flags set we will always be called in task context - * (i.e. from a workqueue). - */ -int -xfs_end_io_direct_write( -	struct kiocb		*iocb, -	loff_t			offset, -	ssize_t			size, -	void			*private) -{ -	struct inode		*inode = file_inode(iocb->ki_filp); -	struct xfs_inode	*ip = XFS_I(inode); -	uintptr_t		flags = (uintptr_t)private; -	int			error = 0; - -	trace_xfs_end_io_direct_write(ip, offset, size); - -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		return -EIO; - -	if (size <= 0) -		return size; - -	/* -	 * The flags tell us whether we are doing unwritten extent conversions -	 * or an append transaction that updates the on-disk file size. These -	 * cases are the only cases where we should *potentially* be needing -	 * to update the VFS inode size. -	 */ -	if (flags == 0) { -		ASSERT(offset + size <= i_size_read(inode)); -		return 0; -	} - -	/* -	 * We need to update the in-core inode size here so that we don't end up -	 * with the on-disk inode size being outside the in-core inode size. We -	 * have no other method of updating EOF for AIO, so always do it here -	 * if necessary. -	 * -	 * We need to lock the test/set EOF update as we can be racing with -	 * other IO completions here to update the EOF. Failing to serialise -	 * here can result in EOF moving backwards and Bad Things Happen when -	 * that occurs. -	 */ -	spin_lock(&ip->i_flags_lock); -	if (offset + size > i_size_read(inode)) -		i_size_write(inode, offset + size); -	spin_unlock(&ip->i_flags_lock); - -	if (flags & XFS_DIO_FLAG_COW) -		error = xfs_reflink_end_cow(ip, offset, size); -	if (flags & XFS_DIO_FLAG_UNWRITTEN) { -		trace_xfs_end_io_direct_write_unwritten(ip, offset, size); - -		error = xfs_iomap_write_unwritten(ip, offset, size); -	} -	if (flags & XFS_DIO_FLAG_APPEND) { -		trace_xfs_end_io_direct_write_append(ip, offset, size); - -		error = xfs_setfilesize(ip, offset, size); -	} - -	return error; -} -  STATIC ssize_t  xfs_vm_direct_IO(  	struct kiocb		*iocb, @@ -1583,7 +1309,6 @@ xfs_vm_bmap(  	struct xfs_inode	*ip = XFS_I(inode);  	trace_xfs_vm_bmap(XFS_I(inode)); -	xfs_ilock(ip, XFS_IOLOCK_SHARED);  	/*  	 * The swap code (ab-)uses ->bmap to get a block mapping and then @@ -1591,12 +1316,10 @@ xfs_vm_bmap(  	 * that on reflinks inodes, so we have to skip out here.  And yes,  	 * 0 is the magic code for a bmap error..  	 */ -	if (xfs_is_reflink_inode(ip)) { -		xfs_iunlock(ip, XFS_IOLOCK_SHARED); +	if (xfs_is_reflink_inode(ip))  		return 0; -	} +  	filemap_write_and_wait(mapping); -	xfs_iunlock(ip, XFS_IOLOCK_SHARED);  	return generic_block_bmap(mapping, block, xfs_get_blocks);  }  |