From a36b926180cda375ac2ec89e1748b47137cfc51c Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Jan 2017 23:22:55 -0800 Subject: xfs: pull up iolock from xfs_free_eofblocks() xfs_free_eofblocks() requires the IOLOCK_EXCL lock, but is called from different contexts where the lock may or may not be held. The need_iolock parameter exists for this reason, to indicate whether xfs_free_eofblocks() must acquire the iolock itself before it can proceed. This is ugly and confusing. Simplify the semantics of xfs_free_eofblocks() to require the caller to acquire the iolock appropriately and kill the need_iolock parameter. While here, the mp param can be removed as well as the xfs_mount is accessible from the xfs_inode structure. This patch does not change behavior. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c1417919ab0a..9319ee9759d4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) */ int xfs_free_eofblocks( - xfs_mount_t *mp, - xfs_inode_t *ip, - bool need_iolock) + struct xfs_inode *ip) { - xfs_trans_t *tp; - int error; - xfs_fileoff_t end_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t map_len; - int nimaps; - xfs_bmbt_irec_t imap; + struct xfs_trans *tp; + int error; + xfs_fileoff_t end_fsb; + xfs_fileoff_t last_fsb; + xfs_filblks_t map_len; + int nimaps; + struct xfs_bmbt_irec imap; + struct xfs_mount *mp = ip->i_mount; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); /* * Figure out if there are any blocks beyond the end @@ -944,6 +945,10 @@ xfs_free_eofblocks( error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* + * If there are blocks after the end of file, truncate the file to its + * current size to free them up. + */ if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { @@ -954,22 +959,10 @@ xfs_free_eofblocks( if (error) return error; - /* - * There are blocks after the end of file. - * Free them up now by truncating the file to - * its current size. - */ - if (need_iolock) { - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) - return -EAGAIN; - } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -997,8 +990,6 @@ xfs_free_eofblocks( } xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; } @@ -1415,7 +1406,7 @@ xfs_shift_file_space( * into the accessible region of the file. */ if (xfs_can_free_eofblocks(ip, true)) { - error = xfs_free_eofblocks(mp, ip, false); + error = xfs_free_eofblocks(ip); if (error) return error; } -- cgit From e4229d6b0bc9280f29624faf170cf76a9f1ca60e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Jan 2017 23:22:57 -0800 Subject: xfs: fix eofblocks race with file extending async dio writes It's possible for post-eof blocks to end up being used for direct I/O writes. dio write performs an upfront unwritten extent allocation, sends the dio and then updates the inode size (if necessary) on write completion. If a file release occurs while a file extending dio write is in flight, it is possible to mistake the post-eof blocks for speculative preallocation and incorrectly truncate them from the inode. This means that the resulting dio write completion can discover a hole and allocate new blocks rather than perform unwritten extent conversion. This requires a strange mix of I/O and is thus not likely to reproduce in real world workloads. It is intermittently reproduced by generic/299. The error manifests as an assert failure due to transaction overrun because the aforementioned write completion transaction has only reserved enough blocks for btree operations: XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, \ file: fs/xfs//xfs_trans.c, line: 309 The root cause is that xfs_free_eofblocks() uses i_size to truncate post-eof blocks from the inode, but async, file extending direct writes do not update i_size until write completion, long after inode locks are dropped. Therefore, xfs_free_eofblocks() effectively truncates the inode to the incorrect size. Update xfs_free_eofblocks() to serialize against dio similar to how extending writes are serialized against i_size updates before post-eof block zeroing. Specifically, wait on dio while under the iolock. This ensures that dio write completions have updated i_size before post-eof blocks are processed. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 9319ee9759d4..eb890ed1ed5c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -959,6 +959,9 @@ xfs_free_eofblocks( if (error) return error; + /* wait on dio to ensure i_size has settled */ + inode_dio_wait(VFS_I(ip)); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { -- cgit From 1dbba0863468f509f3fccb498b34f1b1e24356d9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:24:28 -0800 Subject: xfs: remove unused full argument from bmap The "full" argument was used only by the fiemap formatter, which is now gone with the iomap updates. Remove the unused arg. Signed-off-by: Eric Sandeen Reviewed-by: Alex Elder Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 6 ++---- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_ioctl.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index eb890ed1ed5c..7c3bfafffba8 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -787,11 +787,9 @@ xfs_getbmap( xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { - int full = 0; /* user array is full */ - /* format results & advance arg */ - error = formatter(&arg, &out[i], &full); - if (error || full) + error = formatter(&arg, &out[i]); + if (error) break; } diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index f1005393785c..135d8267e284 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, xfs_fileoff_t start_fsb, xfs_fileoff_t length); /* bmap to userspace formatter - copy to user & advance pointer */ -typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); +typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, xfs_bmap_format_t formatter, void *arg); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c67cfb451fd3..cf1363dbf32b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1524,7 +1524,7 @@ out_drop_write: } STATIC int -xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmap_format(void **ap, struct getbmapx *bmv) { struct getbmap __user *base = (struct getbmap __user *)*ap; @@ -1567,7 +1567,7 @@ xfs_ioc_getbmap( } STATIC int -xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmapx_format(void **ap, struct getbmapx *bmv) { struct getbmapx __user *base = (struct getbmapx __user *)*ap; -- cgit From 48af96ab92bc68fb645068b978ce36df2379e076 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 15 Feb 2017 10:18:10 -0800 Subject: xfs: don't reserve blocks for right shift transactions The block reservation for the transaction allocated in xfs_shift_file_space() is an artifact of the original collapse range support. It exists to handle the case where a collapse range occurs, the initial extent is left shifted into a location that forms a contiguous boundary with the previous extent and thus the extents are merged. This code was subsequently refactored and reused for insert range (right shift) support. If an insert range occurs under low free space conditions, the extent at the starting offset is split before the first shift transaction is allocated. If the block reservation fails, this leaves separate, but contiguous extents around in the inode. While not a fatal problem, this is unexpected and will flag a warning on subsequent insert range operations on the inode. This problem has been reproduce intermittently by generic/270 running against a ramdisk device. Since right shift does not create new extent boundaries in the inode, a block reservation for extent merge is unnecessary. Update xfs_shift_file_space() to conditionally reserve fs blocks for left shift transactions only. This avoids the warning reproduced by generic/270. Reported-by: Ross Zwisler Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 7c3bfafffba8..6be5f26783a1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1385,10 +1385,16 @@ xfs_shift_file_space( xfs_fileoff_t stop_fsb; xfs_fileoff_t next_fsb; xfs_fileoff_t shift_fsb; + uint resblks; ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); if (direction == SHIFT_LEFT) { + /* + * Reserve blocks to cover potential extent merges after left + * shift operations. + */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); next_fsb = XFS_B_TO_FSB(mp, offset + len); stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); } else { @@ -1396,6 +1402,7 @@ xfs_shift_file_space( * If right shift, delegate the work of initialization of * next_fsb to xfs_bmap_shift_extent as it has ilock held. */ + resblks = 0; next_fsb = NULLFSBLOCK; stop_fsb = XFS_B_TO_FSB(mp, offset); } @@ -1437,21 +1444,14 @@ xfs_shift_file_space( } while (!error && !done) { - /* - * We would need to reserve permanent block for transaction. - * This will come into picture when after shifting extent into - * hole we found that adjacent extents can be merged which - * may lead to freeing of a block during record update. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, + &tp); if (error) break; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, - ip->i_gdquot, ip->i_pdquot, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, + ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto out_trans_cancel; -- cgit From 089ec2f87578b9740f0c27bcea9cc6be59c1ddb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Feb 2017 08:21:06 -0800 Subject: xfs: simplify xfs_rtallocate_extent We can deduce the allocation type from the bno argument, and do the return without prod much simpler internally. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: fix the macro for the non-rt build] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 13 ++++--------- fs/xfs/xfs_rtalloc.c | 24 ++++++++---------------- fs/xfs/xfs_rtalloc.h | 3 +-- 3 files changed, 13 insertions(+), 27 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 6be5f26783a1..8b75dcea5966 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -88,7 +88,6 @@ int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - xfs_alloctype_t atype = 0; /* type for allocation routines */ int error; /* error return value */ xfs_mount_t *mp; /* mount point structure */ xfs_extlen_t prod = 0; /* product factor for allocators */ @@ -155,18 +154,14 @@ xfs_bmap_rtalloc( /* * Realtime allocation, done through xfs_rtallocate_extent. */ - atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; - if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, - &ralen, atype, ap->wasdel, prod, &rtb))) - return error; - if (rtb == NULLFSBLOCK && prod > 1 && - (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, - ap->length, &ralen, atype, - ap->wasdel, 1, &rtb))) + error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, + &ralen, ap->wasdel, prod, &rtb); + if (error) return error; + ap->blkno = rtb; if (ap->blkno != NULLFSBLOCK) { ap->blkno *= mp->m_sb.sb_rextsize; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 802bcc326d9f..c57aa7f18087 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1093,7 +1093,6 @@ xfs_rtallocate_extent( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ int wasdel, /* was a delayed allocation extent */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -1123,27 +1122,16 @@ xfs_rtallocate_extent( } } +retry: sumbp = NULL; - /* - * Allocate by size, or near another block, or exactly at some block. - */ - switch (type) { - case XFS_ALLOCTYPE_ANY_AG: + if (bno == 0) { error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len, &sumbp, &sb, prod, &r); - break; - case XFS_ALLOCTYPE_NEAR_BNO: + } else { error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen, len, &sumbp, &sb, prod, &r); - break; - case XFS_ALLOCTYPE_THIS_BNO: - error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, - len, &sumbp, &sb, prod, &r); - break; - default: - error = -EIO; - ASSERT(0); } + if (error) return error; @@ -1158,7 +1146,11 @@ xfs_rtallocate_extent( xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen); else xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen); + } else if (prod > 1) { + prod = 1; + goto retry; } + *rtblock = r; return 0; } diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 355dd9e1cb64..51dd3c726608 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -40,7 +40,6 @@ xfs_rtallocate_extent( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ int wasdel, /* was a delayed allocation extent */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock); /* out: start block allocated */ @@ -122,7 +121,7 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, #else -# define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS) +# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) # define xfs_growfs_rt(mp,in) (ENOSYS) -- cgit