diff options
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
| -rw-r--r-- | fs/xfs/xfs_bmap_util.c | 194 | 
1 files changed, 105 insertions, 89 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 64731ef3324d..1707980f9a4b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -133,7 +133,7 @@ xfs_bmap_finish(  			mp = ntp->t_mountp;  			if (!XFS_FORCED_SHUTDOWN(mp))  				xfs_force_shutdown(mp, -						   (error == EFSCORRUPTED) ? +						   (error == -EFSCORRUPTED) ?  						   SHUTDOWN_CORRUPT_INCORE :  						   SHUTDOWN_META_IO_ERROR);  			return error; @@ -365,7 +365,7 @@ xfs_bmap_count_tree(  			xfs_trans_brelse(tp, bp);  			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",  					 XFS_ERRLEVEL_LOW, mp); -			return XFS_ERROR(EFSCORRUPTED); +			return -EFSCORRUPTED;  		}  		xfs_trans_brelse(tp, bp);  	} else { @@ -425,14 +425,14 @@ xfs_bmap_count_blocks(  	ASSERT(level > 0);  	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);  	bno = be64_to_cpu(*pp); -	ASSERT(bno != NULLDFSBNO); +	ASSERT(bno != NULLFSBLOCK);  	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);  	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);  	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {  		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,  				 mp); -		return XFS_ERROR(EFSCORRUPTED); +		return -EFSCORRUPTED;  	}  	return 0; @@ -524,13 +524,13 @@ xfs_getbmap(  			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&  			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&  			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) -				return XFS_ERROR(EINVAL); +				return -EINVAL;  		} else if (unlikely(  			   ip->i_d.di_aformat != 0 &&  			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {  			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,  					 ip->i_mount); -			return XFS_ERROR(EFSCORRUPTED); +			return -EFSCORRUPTED;  		}  		prealloced = 0; @@ -539,7 +539,7 @@ xfs_getbmap(  		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&  		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&  		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) -			return XFS_ERROR(EINVAL); +			return -EINVAL;  		if (xfs_get_extsz_hint(ip) ||  		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ @@ -559,26 +559,26 @@ xfs_getbmap(  		bmv->bmv_entries = 0;  		return 0;  	} else if (bmv->bmv_length < 0) { -		return XFS_ERROR(EINVAL); +		return -EINVAL;  	}  	nex = bmv->bmv_count - 1;  	if (nex <= 0) -		return XFS_ERROR(EINVAL); +		return -EINVAL;  	bmvend = bmv->bmv_offset + bmv->bmv_length;  	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) -		return XFS_ERROR(ENOMEM); +		return -ENOMEM;  	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);  	if (!out) -		return XFS_ERROR(ENOMEM); +		return -ENOMEM;  	xfs_ilock(ip, XFS_IOLOCK_SHARED);  	if (whichfork == XFS_DATA_FORK) {  		if (!(iflags & BMV_IF_DELALLOC) &&  		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { -			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); +			error = filemap_write_and_wait(VFS_I(ip)->i_mapping);  			if (error)  				goto out_unlock_iolock; @@ -611,7 +611,7 @@ xfs_getbmap(  	/*  	 * Allocate enough space to handle "subnex" maps at a time.  	 */ -	error = ENOMEM; +	error = -ENOMEM;  	subnex = 16;  	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);  	if (!map) @@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)  	 * have speculative prealloc/delalloc blocks to remove.  	 */  	if (VFS_I(ip)->i_size == 0 && -	    VN_CACHED(VFS_I(ip)) == 0 && +	    VFS_I(ip)->i_mapping->nrpages == 0 &&  	    ip->i_delayed_blks == 0)  		return false; @@ -882,7 +882,7 @@ xfs_free_eofblocks(  		if (need_iolock) {  			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {  				xfs_trans_cancel(tp, 0); -				return EAGAIN; +				return -EAGAIN;  			}  		} @@ -955,14 +955,14 @@ xfs_alloc_file_space(  	trace_xfs_alloc_file_space(ip);  	if (XFS_FORCED_SHUTDOWN(mp)) -		return XFS_ERROR(EIO); +		return -EIO;  	error = xfs_qm_dqattach(ip, 0);  	if (error)  		return error;  	if (len <= 0) -		return XFS_ERROR(EINVAL); +		return -EINVAL;  	rt = XFS_IS_REALTIME_INODE(ip);  	extsz = xfs_get_extsz_hint(ip); @@ -1028,7 +1028,7 @@ xfs_alloc_file_space(  			/*  			 * Free the transaction structure.  			 */ -			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); +			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));  			xfs_trans_cancel(tp, 0);  			break;  		} @@ -1065,7 +1065,7 @@ xfs_alloc_file_space(  		allocated_fsb = imapp->br_blockcount;  		if (nimaps == 0) { -			error = XFS_ERROR(ENOSPC); +			error = -ENOSPC;  			break;  		} @@ -1126,7 +1126,7 @@ xfs_zero_remaining_bytes(  					mp->m_rtdev_targp : mp->m_ddev_targp,  				  BTOBB(mp->m_sb.sb_blocksize), 0);  	if (!bp) -		return XFS_ERROR(ENOMEM); +		return -ENOMEM;  	xfs_buf_unlock(bp); @@ -1158,7 +1158,7 @@ xfs_zero_remaining_bytes(  		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));  		if (XFS_FORCED_SHUTDOWN(mp)) { -			error = XFS_ERROR(EIO); +			error = -EIO;  			break;  		}  		xfs_buf_iorequest(bp); @@ -1176,7 +1176,7 @@ xfs_zero_remaining_bytes(  		XFS_BUF_WRITE(bp);  		if (XFS_FORCED_SHUTDOWN(mp)) { -			error = XFS_ERROR(EIO); +			error = -EIO;  			break;  		}  		xfs_buf_iorequest(bp); @@ -1234,7 +1234,7 @@ xfs_free_file_space(  	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	ioffset = offset & ~(rounding - 1); -	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, +	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  					      ioffset, -1);  	if (error)  		goto out; @@ -1315,7 +1315,7 @@ xfs_free_file_space(  			/*  			 * Free the transaction structure.  			 */ -			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); +			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));  			xfs_trans_cancel(tp, 0);  			break;  		} @@ -1470,6 +1470,26 @@ xfs_collapse_file_space(  	start_fsb = XFS_B_TO_FSB(mp, offset + len);  	shift_fsb = XFS_B_TO_FSB(mp, len); +	/* +	 * Writeback the entire file and force remove any post-eof blocks. The +	 * writeback prevents changes to the extent list via concurrent +	 * writeback and the eofblocks trim prevents the extent shift algorithm +	 * from running into a post-eof delalloc extent. +	 * +	 * XXX: This is a temporary fix until the extent shift loop below is +	 * converted to use offsets and lookups within the ILOCK rather than +	 * carrying around the index into the extent list for the next +	 * iteration. +	 */ +	error = filemap_write_and_wait(VFS_I(ip)->i_mapping); +	if (error) +		return error; +	if (xfs_can_free_eofblocks(ip, true)) { +		error = xfs_free_eofblocks(mp, ip, false); +		if (error) +			return error; +	} +  	error = xfs_free_file_space(ip, offset, len);  	if (error)  		return error; @@ -1557,14 +1577,14 @@ xfs_swap_extents_check_format(  	/* Should never get a local format */  	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||  	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		return EINVAL; +		return -EINVAL;  	/*  	 * if the target inode has less extents that then temporary inode then  	 * why did userspace call us?  	 */  	if (ip->i_d.di_nextents < tip->i_d.di_nextents) -		return EINVAL; +		return -EINVAL;  	/*  	 * if the target inode is in extent form and the temp inode is in btree @@ -1573,19 +1593,19 @@ xfs_swap_extents_check_format(  	 */  	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&  	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE) -		return EINVAL; +		return -EINVAL;  	/* Check temp in extent form to max in target */  	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&  	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >  			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) -		return EINVAL; +		return -EINVAL;  	/* Check target in extent form to max in temp */  	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&  	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >  			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) -		return EINVAL; +		return -EINVAL;  	/*  	 * If we are in a btree format, check that the temp root block will fit @@ -1599,26 +1619,50 @@ xfs_swap_extents_check_format(  	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {  		if (XFS_IFORK_BOFF(ip) &&  		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) -			return EINVAL; +			return -EINVAL;  		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=  		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) -			return EINVAL; +			return -EINVAL;  	}  	/* Reciprocal target->temp btree format checks */  	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {  		if (XFS_IFORK_BOFF(tip) &&  		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) -			return EINVAL; +			return -EINVAL;  		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=  		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) -			return EINVAL; +			return -EINVAL;  	}  	return 0;  }  int +xfs_swap_extent_flush( +	struct xfs_inode	*ip) +{ +	int	error; + +	error = filemap_write_and_wait(VFS_I(ip)->i_mapping); +	if (error) +		return error; +	truncate_pagecache_range(VFS_I(ip), 0, -1); + +	/* Verify O_DIRECT for ftmp */ +	if (VFS_I(ip)->i_mapping->nrpages) +		return -EINVAL; + +	/* +	 * Don't try to swap extents on mmap()d files because we can't lock +	 * out races against page faults safely. +	 */ +	if (mapping_mapped(VFS_I(ip)->i_mapping)) +		return -EBUSY; +	return 0; +} + +int  xfs_swap_extents(  	xfs_inode_t	*ip,	/* target inode */  	xfs_inode_t	*tip,	/* tmp inode */ @@ -1633,51 +1677,57 @@ xfs_swap_extents(  	int		aforkblks = 0;  	int		taforkblks = 0;  	__uint64_t	tmp; +	int		lock_flags;  	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);  	if (!tempifp) { -		error = XFS_ERROR(ENOMEM); +		error = -ENOMEM;  		goto out;  	}  	/* -	 * we have to do two separate lock calls here to keep lockdep -	 * happy. If we try to get all the locks in one call, lock will -	 * report false positives when we drop the ILOCK and regain them -	 * below. +	 * Lock up the inodes against other IO and truncate to begin with. +	 * Then we can ensure the inodes are flushed and have no page cache +	 * safely. Once we have done this we can take the ilocks and do the rest +	 * of the checks.  	 */ +	lock_flags = XFS_IOLOCK_EXCL;  	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); -	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);  	/* Verify that both files have the same format */  	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { -		error = XFS_ERROR(EINVAL); +		error = -EINVAL;  		goto out_unlock;  	}  	/* Verify both files are either real-time or non-realtime */  	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { -		error = XFS_ERROR(EINVAL); +		error = -EINVAL;  		goto out_unlock;  	} -	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); +	error = xfs_swap_extent_flush(ip); +	if (error) +		goto out_unlock; +	error = xfs_swap_extent_flush(tip);  	if (error)  		goto out_unlock; -	truncate_pagecache_range(VFS_I(tip), 0, -1); -	/* Verify O_DIRECT for ftmp */ -	if (VN_CACHED(VFS_I(tip)) != 0) { -		error = XFS_ERROR(EINVAL); +	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); +	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0);  		goto out_unlock;  	} +	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); +	lock_flags |= XFS_ILOCK_EXCL;  	/* Verify all data are being swapped */  	if (sxp->sx_offset != 0 ||  	    sxp->sx_length != ip->i_d.di_size ||  	    sxp->sx_length != tip->i_d.di_size) { -		error = XFS_ERROR(EFAULT); -		goto out_unlock; +		error = -EFAULT; +		goto out_trans_cancel;  	}  	trace_xfs_swap_extent_before(ip, 0); @@ -1689,7 +1739,7 @@ xfs_swap_extents(  		xfs_notice(mp,  		    "%s: inode 0x%llx format is incompatible for exchanging.",  				__func__, ip->i_ino); -		goto out_unlock; +		goto out_trans_cancel;  	}  	/* @@ -1703,43 +1753,9 @@ xfs_swap_extents(  	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||  	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||  	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { -		error = XFS_ERROR(EBUSY); -		goto out_unlock; +		error = -EBUSY; +		goto out_trans_cancel;  	} - -	/* We need to fail if the file is memory mapped.  Once we have tossed -	 * all existing pages, the page fault will have no option -	 * but to go to the filesystem for pages. By making the page fault call -	 * vop_read (or write in the case of autogrow) they block on the iolock -	 * until we have switched the extents. -	 */ -	if (VN_MAPPED(VFS_I(ip))) { -		error = XFS_ERROR(EBUSY); -		goto out_unlock; -	} - -	xfs_iunlock(ip, XFS_ILOCK_EXCL); -	xfs_iunlock(tip, XFS_ILOCK_EXCL); - -	/* -	 * There is a race condition here since we gave up the -	 * ilock.  However, the data fork will not change since -	 * we have the iolock (locked for truncation too) so we -	 * are safe.  We don't really care if non-io related -	 * fields change. -	 */ -	truncate_pagecache_range(VFS_I(ip), 0, -1); - -	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); -	if (error) { -		xfs_iunlock(ip,  XFS_IOLOCK_EXCL); -		xfs_iunlock(tip, XFS_IOLOCK_EXCL); -		xfs_trans_cancel(tp, 0); -		goto out; -	} -	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); -  	/*  	 * Count the number of extended attribute blocks  	 */ @@ -1757,8 +1773,8 @@ xfs_swap_extents(  			goto out_trans_cancel;  	} -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +	xfs_trans_ijoin(tp, ip, lock_flags); +	xfs_trans_ijoin(tp, tip, lock_flags);  	/*  	 * Before we've swapped the forks, lets set the owners of the forks @@ -1887,8 +1903,8 @@ out:  	return error;  out_unlock: -	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +	xfs_iunlock(ip, lock_flags); +	xfs_iunlock(tip, lock_flags);  	goto out;  out_trans_cancel:  |