diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
| -rw-r--r-- | fs/xfs/xfs_inode.c | 553 | 
1 files changed, 145 insertions, 408 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a098a20ca63e..2fcca4b03ed3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -37,7 +37,6 @@  #include "xfs_buf_item.h"  #include "xfs_inode_item.h"  #include "xfs_btree.h" -#include "xfs_btree_trace.h"  #include "xfs_alloc.h"  #include "xfs_ialloc.h"  #include "xfs_bmap.h" @@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;  kmem_zone_t *xfs_inode_zone;  /* - * Used in xfs_itruncate().  This is the maximum number of extents + * Used in xfs_itruncate_extents().  This is the maximum number of extents   * freed from a file in a single transaction.   */  #define	XFS_ITRUNC_MAX_EXTENTS	2 @@ -167,7 +166,7 @@ xfs_imap_to_bp(  		dip = (xfs_dinode_t *)xfs_buf_offset(bp,  					(i << mp->m_sb.sb_inodelog)); -		di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && +		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&  			    XFS_DINODE_GOOD_VERSION(dip->di_version);  		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,  						XFS_ERRTAG_ITOBP_INOTOBP, @@ -369,7 +368,7 @@ xfs_iformat(  			/*  			 * no local regular files yet  			 */ -			if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { +			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {  				xfs_warn(ip->i_mount,  			"corrupt inode %Lu (local format for regular file).",  					(unsigned long long) ip->i_ino); @@ -802,7 +801,7 @@ xfs_iread(  	 * If we got something that isn't an inode it means someone  	 * (nfs or dmi) has a stale handle.  	 */ -	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { +	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {  #ifdef DEBUG  		xfs_alert(mp,  			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", @@ -1041,7 +1040,7 @@ xfs_ialloc(  	if (pip && XFS_INHERIT_GID(pip)) {  		ip->i_d.di_gid = pip->i_d.di_gid; -		if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { +		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {  			ip->i_d.di_mode |= S_ISGID;  		}  	} @@ -1098,14 +1097,14 @@ xfs_ialloc(  		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {  			uint	di_flags = 0; -			if ((mode & S_IFMT) == S_IFDIR) { +			if (S_ISDIR(mode)) {  				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)  					di_flags |= XFS_DIFLAG_RTINHERIT;  				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {  					di_flags |= XFS_DIFLAG_EXTSZINHERIT;  					ip->i_d.di_extsize = pip->i_d.di_extsize;  				} -			} else if ((mode & S_IFMT) == S_IFREG) { +			} else if (S_ISREG(mode)) {  				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)  					di_flags |= XFS_DIFLAG_REALTIME;  				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { @@ -1179,17 +1178,17 @@ xfs_ialloc(   * at least do it for regular files.   */  #ifdef DEBUG -void +STATIC void  xfs_isize_check( -	xfs_mount_t	*mp, -	xfs_inode_t	*ip, -	xfs_fsize_t	isize) +	struct xfs_inode	*ip, +	xfs_fsize_t		isize)  { -	xfs_fileoff_t	map_first; -	int		nimaps; -	xfs_bmbt_irec_t	imaps[2]; +	struct xfs_mount	*mp = ip->i_mount; +	xfs_fileoff_t		map_first; +	int			nimaps; +	xfs_bmbt_irec_t		imaps[2]; -	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) +	if (!S_ISREG(ip->i_d.di_mode))  		return;  	if (XFS_IS_REALTIME_INODE(ip)) @@ -1214,168 +1213,14 @@ xfs_isize_check(  	ASSERT(nimaps == 1);  	ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);  } +#else	/* DEBUG */ +#define xfs_isize_check(ip, isize)  #endif	/* DEBUG */  /* - * Calculate the last possible buffered byte in a file.  This must - * include data that was buffered beyond the EOF by the write code. - * This also needs to deal with overflowing the xfs_fsize_t type - * which can happen for sizes near the limit. - * - * We also need to take into account any blocks beyond the EOF.  It - * may be the case that they were buffered by a write which failed. - * In that case the pages will still be in memory, but the inode size - * will never have been updated. - */ -STATIC xfs_fsize_t -xfs_file_last_byte( -	xfs_inode_t	*ip) -{ -	xfs_mount_t	*mp; -	xfs_fsize_t	last_byte; -	xfs_fileoff_t	last_block; -	xfs_fileoff_t	size_last_block; -	int		error; - -	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); - -	mp = ip->i_mount; -	/* -	 * Only check for blocks beyond the EOF if the extents have -	 * been read in.  This eliminates the need for the inode lock, -	 * and it also saves us from looking when it really isn't -	 * necessary. -	 */ -	if (ip->i_df.if_flags & XFS_IFEXTENTS) { -		xfs_ilock(ip, XFS_ILOCK_SHARED); -		error = xfs_bmap_last_offset(NULL, ip, &last_block, -			XFS_DATA_FORK); -		xfs_iunlock(ip, XFS_ILOCK_SHARED); -		if (error) { -			last_block = 0; -		} -	} else { -		last_block = 0; -	} -	size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); -	last_block = XFS_FILEOFF_MAX(last_block, size_last_block); - -	last_byte = XFS_FSB_TO_B(mp, last_block); -	if (last_byte < 0) { -		return XFS_MAXIOFFSET(mp); -	} -	last_byte += (1 << mp->m_writeio_log); -	if (last_byte < 0) { -		return XFS_MAXIOFFSET(mp); -	} -	return last_byte; -} - -/* - * Start the truncation of the file to new_size.  The new size - * must be smaller than the current size.  This routine will - * clear the buffer and page caches of file data in the removed - * range, and xfs_itruncate_finish() will remove the underlying - * disk blocks. - * - * The inode must have its I/O lock locked EXCLUSIVELY, and it - * must NOT have the inode lock held at all.  This is because we're - * calling into the buffer/page cache code and we can't hold the - * inode lock when we do so. - * - * We need to wait for any direct I/Os in flight to complete before we - * proceed with the truncate. This is needed to prevent the extents - * being read or written by the direct I/Os from being removed while the - * I/O is in flight as there is no other method of synchronising - * direct I/O with the truncate operation.  Also, because we hold - * the IOLOCK in exclusive mode, we prevent new direct I/Os from being - * started until the truncate completes and drops the lock. Essentially, - * the xfs_ioend_wait() call forms an I/O barrier that provides strict - * ordering between direct I/Os and the truncate operation. - * - * The flags parameter can have either the value XFS_ITRUNC_DEFINITE - * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used - * in the case that the caller is locking things out of order and - * may not be able to call xfs_itruncate_finish() with the inode lock - * held without dropping the I/O lock.  If the caller must drop the - * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() - * must be called again with all the same restrictions as the initial - * call. - */ -int -xfs_itruncate_start( -	xfs_inode_t	*ip, -	uint		flags, -	xfs_fsize_t	new_size) -{ -	xfs_fsize_t	last_byte; -	xfs_off_t	toss_start; -	xfs_mount_t	*mp; -	int		error = 0; - -	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); -	ASSERT((new_size == 0) || (new_size <= ip->i_size)); -	ASSERT((flags == XFS_ITRUNC_DEFINITE) || -	       (flags == XFS_ITRUNC_MAYBE)); - -	mp = ip->i_mount; - -	/* wait for the completion of any pending DIOs */ -	if (new_size == 0 || new_size < ip->i_size) -		xfs_ioend_wait(ip); - -	/* -	 * Call toss_pages or flushinval_pages to get rid of pages -	 * overlapping the region being removed.  We have to use -	 * the less efficient flushinval_pages in the case that the -	 * caller may not be able to finish the truncate without -	 * dropping the inode's I/O lock.  Make sure -	 * to catch any pages brought in by buffers overlapping -	 * the EOF by searching out beyond the isize by our -	 * block size. We round new_size up to a block boundary -	 * so that we don't toss things on the same block as -	 * new_size but before it. -	 * -	 * Before calling toss_page or flushinval_pages, make sure to -	 * call remapf() over the same region if the file is mapped. -	 * This frees up mapped file references to the pages in the -	 * given range and for the flushinval_pages case it ensures -	 * that we get the latest mapped changes flushed out. -	 */ -	toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); -	toss_start = XFS_FSB_TO_B(mp, toss_start); -	if (toss_start < 0) { -		/* -		 * The place to start tossing is beyond our maximum -		 * file size, so there is no way that the data extended -		 * out there. -		 */ -		return 0; -	} -	last_byte = xfs_file_last_byte(ip); -	trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); -	if (last_byte > toss_start) { -		if (flags & XFS_ITRUNC_DEFINITE) { -			xfs_tosspages(ip, toss_start, -					-1, FI_REMAPF_LOCKED); -		} else { -			error = xfs_flushinval_pages(ip, toss_start, -					-1, FI_REMAPF_LOCKED); -		} -	} - -#ifdef DEBUG -	if (new_size == 0) { -		ASSERT(VN_CACHED(VFS_I(ip)) == 0); -	} -#endif -	return error; -} - -/* - * Shrink the file to the given new_size.  The new size must be smaller than - * the current size.  This will free up the underlying blocks in the removed - * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). + * Free up the underlying blocks past new_size.  The new size must be smaller + * than the current size.  This routine can be used both for the attribute and + * data fork, and does not modify the inode size, which is left to the caller.   *   * The transaction passed to this routine must have made a permanent log   * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the @@ -1387,31 +1232,6 @@ xfs_itruncate_start(   * will be "held" within the returned transaction.  This routine does NOT   * require any disk space to be reserved for it within the transaction.   * - * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it - * indicates the fork which is to be truncated.  For the attribute fork we only - * support truncation to size 0. - * - * We use the sync parameter to indicate whether or not the first transaction - * we perform might have to be synchronous.  For the attr fork, it needs to be - * so if the unlink of the inode is not yet known to be permanent in the log. - * This keeps us from freeing and reusing the blocks of the attribute fork - * before the unlink of the inode becomes permanent. - * - * For the data fork, we normally have to run synchronously if we're being - * called out of the inactive path or we're being called out of the create path - * where we're truncating an existing file.  Either way, the truncate needs to - * be sync so blocks don't reappear in the file with altered data in case of a - * crash.  wsync filesystems can run the first case async because anything that - * shrinks the inode has to run sync so by the time we're called here from - * inactive, the inode size is permanently set to 0. - * - * Calls from the truncate path always need to be sync unless we're in a wsync - * filesystem and the file has already been unlinked. - * - * The caller is responsible for correctly setting the sync parameter.  It gets - * too hard for us to guess here which path we're being called out of just - * based on inode state. - *   * If we get an error, we must return with the inode locked and linked into the   * current transaction. This keeps things simple for the higher level code,   * because it always knows that the inode is locked and held in the transaction @@ -1419,124 +1239,30 @@ xfs_itruncate_start(   * dirty on error so that transactions can be easily aborted if possible.   */  int -xfs_itruncate_finish( -	xfs_trans_t	**tp, -	xfs_inode_t	*ip, -	xfs_fsize_t	new_size, -	int		fork, -	int		sync) +xfs_itruncate_extents( +	struct xfs_trans	**tpp, +	struct xfs_inode	*ip, +	int			whichfork, +	xfs_fsize_t		new_size)  { -	xfs_fsblock_t	first_block; -	xfs_fileoff_t	first_unmap_block; -	xfs_fileoff_t	last_block; -	xfs_filblks_t	unmap_len=0; -	xfs_mount_t	*mp; -	xfs_trans_t	*ntp; -	int		done; -	int		committed; -	xfs_bmap_free_t	free_list; -	int		error; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp = *tpp; +	struct xfs_trans	*ntp; +	xfs_bmap_free_t		free_list; +	xfs_fsblock_t		first_block; +	xfs_fileoff_t		first_unmap_block; +	xfs_fileoff_t		last_block; +	xfs_filblks_t		unmap_len; +	int			committed; +	int			error = 0; +	int			done = 0;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); -	ASSERT((new_size == 0) || (new_size <= ip->i_size)); -	ASSERT(*tp != NULL); -	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); -	ASSERT(ip->i_transp == *tp); +	ASSERT(new_size <= ip->i_size); +	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);  	ASSERT(ip->i_itemp != NULL);  	ASSERT(ip->i_itemp->ili_lock_flags == 0); - - -	ntp = *tp; -	mp = (ntp)->t_mountp; -	ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); - -	/* -	 * We only support truncating the entire attribute fork. -	 */ -	if (fork == XFS_ATTR_FORK) { -		new_size = 0LL; -	} -	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); -	trace_xfs_itruncate_finish_start(ip, new_size); - -	/* -	 * The first thing we do is set the size to new_size permanently -	 * on disk.  This way we don't have to worry about anyone ever -	 * being able to look at the data being freed even in the face -	 * of a crash.  What we're getting around here is the case where -	 * we free a block, it is allocated to another file, it is written -	 * to, and then we crash.  If the new data gets written to the -	 * file but the log buffers containing the free and reallocation -	 * don't, then we'd end up with garbage in the blocks being freed. -	 * As long as we make the new_size permanent before actually -	 * freeing any blocks it doesn't matter if they get written to. -	 * -	 * The callers must signal into us whether or not the size -	 * setting here must be synchronous.  There are a few cases -	 * where it doesn't have to be synchronous.  Those cases -	 * occur if the file is unlinked and we know the unlink is -	 * permanent or if the blocks being truncated are guaranteed -	 * to be beyond the inode eof (regardless of the link count) -	 * and the eof value is permanent.  Both of these cases occur -	 * only on wsync-mounted filesystems.  In those cases, we're -	 * guaranteed that no user will ever see the data in the blocks -	 * that are being truncated so the truncate can run async. -	 * In the free beyond eof case, the file may wind up with -	 * more blocks allocated to it than it needs if we crash -	 * and that won't get fixed until the next time the file -	 * is re-opened and closed but that's ok as that shouldn't -	 * be too many blocks. -	 * -	 * However, we can't just make all wsync xactions run async -	 * because there's one call out of the create path that needs -	 * to run sync where it's truncating an existing file to size -	 * 0 whose size is > 0. -	 * -	 * It's probably possible to come up with a test in this -	 * routine that would correctly distinguish all the above -	 * cases from the values of the function parameters and the -	 * inode state but for sanity's sake, I've decided to let the -	 * layers above just tell us.  It's simpler to correctly figure -	 * out in the layer above exactly under what conditions we -	 * can run async and I think it's easier for others read and -	 * follow the logic in case something has to be changed. -	 * cscope is your friend -- rcc. -	 * -	 * The attribute fork is much simpler. -	 * -	 * For the attribute fork we allow the caller to tell us whether -	 * the unlink of the inode that led to this call is yet permanent -	 * in the on disk log.  If it is not and we will be freeing extents -	 * in this inode then we make the first transaction synchronous -	 * to make sure that the unlink is permanent by the time we free -	 * the blocks. -	 */ -	if (fork == XFS_DATA_FORK) { -		if (ip->i_d.di_nextents > 0) { -			/* -			 * If we are not changing the file size then do -			 * not update the on-disk file size - we may be -			 * called from xfs_inactive_free_eofblocks().  If we -			 * update the on-disk file size and then the system -			 * crashes before the contents of the file are -			 * flushed to disk then the files may be full of -			 * holes (ie NULL files bug). -			 */ -			if (ip->i_size != new_size) { -				ip->i_d.di_size = new_size; -				ip->i_size = new_size; -				xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); -			} -		} -	} else if (sync) { -		ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); -		if (ip->i_d.di_anextents > 0) -			xfs_trans_set_sync(ntp); -	} -	ASSERT(fork == XFS_DATA_FORK || -		(fork == XFS_ATTR_FORK && -			((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || -			 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); +	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));  	/*  	 * Since it is possible for space to become allocated beyond @@ -1547,128 +1273,142 @@ xfs_itruncate_finish(  	 * beyond the maximum file size (ie it is the same as last_block),  	 * then there is nothing to do.  	 */ +	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);  	last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); -	ASSERT(first_unmap_block <= last_block); -	done = 0; -	if (last_block == first_unmap_block) { -		done = 1; -	} else { -		unmap_len = last_block - first_unmap_block + 1; -	} +	if (first_unmap_block == last_block) +		return 0; + +	ASSERT(first_unmap_block < last_block); +	unmap_len = last_block - first_unmap_block + 1;  	while (!done) { -		/* -		 * Free up up to XFS_ITRUNC_MAX_EXTENTS.  xfs_bunmapi() -		 * will tell us whether it freed the entire range or -		 * not.  If this is a synchronous mount (wsync), -		 * then we can tell bunmapi to keep all the -		 * transactions asynchronous since the unlink -		 * transaction that made this inode inactive has -		 * already hit the disk.  There's no danger of -		 * the freed blocks being reused, there being a -		 * crash, and the reused blocks suddenly reappearing -		 * in this file with garbage in them once recovery -		 * runs. -		 */  		xfs_bmap_init(&free_list, &first_block); -		error = xfs_bunmapi(ntp, ip, +		error = xfs_bunmapi(tp, ip,  				    first_unmap_block, unmap_len, -				    xfs_bmapi_aflag(fork), +				    xfs_bmapi_aflag(whichfork),  				    XFS_ITRUNC_MAX_EXTENTS,  				    &first_block, &free_list,  				    &done); -		if (error) { -			/* -			 * If the bunmapi call encounters an error, -			 * return to the caller where the transaction -			 * can be properly aborted.  We just need to -			 * make sure we're not holding any resources -			 * that we were not when we came in. -			 */ -			xfs_bmap_cancel(&free_list); -			return error; -		} +		if (error) +			goto out_bmap_cancel;  		/*  		 * Duplicate the transaction that has the permanent  		 * reservation and commit the old transaction.  		 */ -		error = xfs_bmap_finish(tp, &free_list, &committed); -		ntp = *tp; +		error = xfs_bmap_finish(&tp, &free_list, &committed);  		if (committed) -			xfs_trans_ijoin(ntp, ip); - -		if (error) { -			/* -			 * If the bmap finish call encounters an error, return -			 * to the caller where the transaction can be properly -			 * aborted.  We just need to make sure we're not -			 * holding any resources that we were not when we came -			 * in. -			 * -			 * Aborting from this point might lose some blocks in -			 * the file system, but oh well. -			 */ -			xfs_bmap_cancel(&free_list); -			return error; -		} +			xfs_trans_ijoin(tp, ip); +		if (error) +			goto out_bmap_cancel;  		if (committed) {  			/*  			 * Mark the inode dirty so it will be logged and  			 * moved forward in the log as part of every commit.  			 */ -			xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); +			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  		} -		ntp = xfs_trans_dup(ntp); -		error = xfs_trans_commit(*tp, 0); -		*tp = ntp; +		ntp = xfs_trans_dup(tp); +		error = xfs_trans_commit(tp, 0); +		tp = ntp; -		xfs_trans_ijoin(ntp, ip); +		xfs_trans_ijoin(tp, ip);  		if (error) -			return error; +			goto out; +  		/* -		 * transaction commit worked ok so we can drop the extra ticket +		 * Transaction commit worked ok so we can drop the extra ticket  		 * reference that we gained in xfs_trans_dup()  		 */ -		xfs_log_ticket_put(ntp->t_ticket); -		error = xfs_trans_reserve(ntp, 0, +		xfs_log_ticket_put(tp->t_ticket); +		error = xfs_trans_reserve(tp, 0,  					XFS_ITRUNCATE_LOG_RES(mp), 0,  					XFS_TRANS_PERM_LOG_RES,  					XFS_ITRUNCATE_LOG_COUNT);  		if (error) -			return error; +			goto out;  	} + +out: +	*tpp = tp; +	return error; +out_bmap_cancel:  	/* -	 * Only update the size in the case of the data fork, but -	 * always re-log the inode so that our permanent transaction -	 * can keep on rolling it forward in the log. +	 * If the bunmapi call encounters an error, return to the caller where +	 * the transaction can be properly aborted.  We just need to make sure +	 * we're not holding any resources that we were not when we came in.  	 */ -	if (fork == XFS_DATA_FORK) { -		xfs_isize_check(mp, ip, new_size); +	xfs_bmap_cancel(&free_list); +	goto out; +} + +int +xfs_itruncate_data( +	struct xfs_trans	**tpp, +	struct xfs_inode	*ip, +	xfs_fsize_t		new_size) +{ +	int			error; + +	trace_xfs_itruncate_data_start(ip, new_size); + +	/* +	 * The first thing we do is set the size to new_size permanently on +	 * disk.  This way we don't have to worry about anyone ever being able +	 * to look at the data being freed even in the face of a crash. +	 * What we're getting around here is the case where we free a block, it +	 * is allocated to another file, it is written to, and then we crash. +	 * If the new data gets written to the file but the log buffers +	 * containing the free and reallocation don't, then we'd end up with +	 * garbage in the blocks being freed.  As long as we make the new_size +	 * permanent before actually freeing any blocks it doesn't matter if +	 * they get written to. +	 */ +	if (ip->i_d.di_nextents > 0) {  		/* -		 * If we are not changing the file size then do -		 * not update the on-disk file size - we may be -		 * called from xfs_inactive_free_eofblocks().  If we -		 * update the on-disk file size and then the system -		 * crashes before the contents of the file are -		 * flushed to disk then the files may be full of -		 * holes (ie NULL files bug). +		 * If we are not changing the file size then do not update +		 * the on-disk file size - we may be called from +		 * xfs_inactive_free_eofblocks().  If we update the on-disk +		 * file size and then the system crashes before the contents +		 * of the file are flushed to disk then the files may be +		 * full of holes (ie NULL files bug).  		 */  		if (ip->i_size != new_size) {  			ip->i_d.di_size = new_size;  			ip->i_size = new_size; +			xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);  		}  	} -	xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); -	ASSERT((new_size != 0) || -	       (fork == XFS_ATTR_FORK) || -	       (ip->i_delayed_blks == 0)); -	ASSERT((new_size != 0) || -	       (fork == XFS_ATTR_FORK) || -	       (ip->i_d.di_nextents == 0)); -	trace_xfs_itruncate_finish_end(ip, new_size); + +	error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); +	if (error) +		return error; + +	/* +	 * If we are not changing the file size then do not update the on-disk +	 * file size - we may be called from xfs_inactive_free_eofblocks(). +	 * If we update the on-disk file size and then the system crashes +	 * before the contents of the file are flushed to disk then the files +	 * may be full of holes (ie NULL files bug). +	 */ +	xfs_isize_check(ip, new_size); +	if (ip->i_size != new_size) { +		ip->i_d.di_size = new_size; +		ip->i_size = new_size; +	} + +	ASSERT(new_size != 0 || ip->i_delayed_blks == 0); +	ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); + +	/* +	 * Always re-log the inode so that our permanent transaction can keep +	 * on rolling it forward in the log. +	 */ +	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); + +	trace_xfs_itruncate_data_end(ip, new_size);  	return 0;  } @@ -1694,7 +1434,6 @@ xfs_iunlink(  	ASSERT(ip->i_d.di_nlink == 0);  	ASSERT(ip->i_d.di_mode != 0); -	ASSERT(ip->i_transp == tp);  	mp = tp->t_mountp; @@ -1717,7 +1456,7 @@ xfs_iunlink(  	ASSERT(agi->agi_unlinked[bucket_index]);  	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); -	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { +	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {  		/*  		 * There is already another inode in the bucket we need  		 * to add ourselves to.  Add us at the front of the list. @@ -1728,8 +1467,7 @@ xfs_iunlink(  		if (error)  			return error; -		ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); -		/* both on-disk, don't endian flip twice */ +		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));  		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];  		offset = ip->i_imap.im_boffset +  			offsetof(xfs_dinode_t, di_next_unlinked); @@ -1794,7 +1532,7 @@ xfs_iunlink_remove(  	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);  	ASSERT(agino != 0);  	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; -	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); +	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));  	ASSERT(agi->agi_unlinked[bucket_index]);  	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { @@ -1959,7 +1697,7 @@ xfs_ifree_cluster(  		 * stale first, we will not attempt to lock them in the loop  		 * below as the XFS_ISTALE flag will be set.  		 */ -		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); +		lip = bp->b_fspriv;  		while (lip) {  			if (lip->li_type == XFS_LI_INODE) {  				iip = (xfs_inode_log_item_t *)lip; @@ -2086,12 +1824,11 @@ xfs_ifree(  	xfs_buf_t       	*ibp;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	ASSERT(ip->i_transp == tp);  	ASSERT(ip->i_d.di_nlink == 0);  	ASSERT(ip->i_d.di_nextents == 0);  	ASSERT(ip->i_d.di_anextents == 0);  	ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || -	       ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); +	       (!S_ISREG(ip->i_d.di_mode)));  	ASSERT(ip->i_d.di_nblocks == 0);  	/* @@ -2733,7 +2470,7 @@ cluster_corrupt_out:  		 * mark the buffer as an error and call them.  Otherwise  		 * mark it as stale and brelse.  		 */ -		if (XFS_BUF_IODONE_FUNC(bp)) { +		if (bp->b_iodone) {  			XFS_BUF_UNDONE(bp);  			XFS_BUF_STALE(bp);  			XFS_BUF_ERROR(bp,EIO); @@ -2920,7 +2657,7 @@ xfs_iflush_int(  	 */  	xfs_synchronize_times(ip); -	if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, +	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),  			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {  		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,  			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", @@ -2934,7 +2671,7 @@ xfs_iflush_int(  			__func__, ip->i_ino, ip, ip->i_d.di_magic);  		goto corrupt_out;  	} -	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { +	if (S_ISREG(ip->i_d.di_mode)) {  		if (XFS_TEST_ERROR(  		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&  		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), @@ -2944,7 +2681,7 @@ xfs_iflush_int(  				__func__, ip->i_ino, ip);  			goto corrupt_out;  		} -	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { +	} else if (S_ISDIR(ip->i_d.di_mode)) {  		if (XFS_TEST_ERROR(  		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&  		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && @@ -3073,8 +2810,8 @@ xfs_iflush_int(  		 */  		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); -		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); -		ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); +		ASSERT(bp->b_fspriv != NULL); +		ASSERT(bp->b_iodone != NULL);  	} else {  		/*  		 * We're flushing an inode which is not in the AIL and has  |