From 7f9f71be84bcab368e58020a42f6d0dd97adf0ce Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Nov 2018 13:31:09 -0800 Subject: xfs: extent shifting doesn't fully invalidate page cache The extent shifting code uses a flush and invalidate mechainsm prior to shifting extents around. This is similar to what xfs_free_file_space() does, but it doesn't take into account things like page cache vs block size differences, and it will fail if there is a page that it currently busy. xfs_flush_unmap_range() handles all of these cases, so just convert xfs_prepare_shift() to us that mechanism rather than having it's own special sauce. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5d263dfdb3bc..167ff4297e5c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1195,13 +1195,7 @@ xfs_prepare_shift( * Writeback and invalidate cache for the remainder of the file as we're * about to shift down every extent from offset to EOF. */ - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1); - if (error) - return error; - error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, - offset >> PAGE_SHIFT, -1); - if (error) - return error; + error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip)); /* * Clean out anything hanging around in the cow fork now that -- cgit From 2c307174ab77e34645e75e12827646e044d273c3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Nov 2018 13:31:10 -0800 Subject: xfs: flush removing page cache in xfs_reflink_remap_prep On a sub-page block size filesystem, fsx is failing with a data corruption after a series of operations involving copying a file with the destination offset beyond EOF of the destination of the file: 8093(157 mod 256): TRUNCATE DOWN from 0x7a120 to 0x50000 ******WWWW 8094(158 mod 256): INSERT 0x25000 thru 0x25fff (0x1000 bytes) 8095(159 mod 256): COPY 0x18000 thru 0x1afff (0x3000 bytes) to 0x2f400 8096(160 mod 256): WRITE 0x5da00 thru 0x651ff (0x7800 bytes) HOLE 8097(161 mod 256): COPY 0x2000 thru 0x5fff (0x4000 bytes) to 0x6fc00 The second copy here is beyond EOF, and it is to sub-page (4k) but block aligned (1k) offset. The clone runs the EOF zeroing, landing in a pre-existing post-eof delalloc extent. This zeroes the post-eof extents in the page cache just fine, dirtying the pages correctly. The problem is that xfs_reflink_remap_prep() now truncates the page cache over the range that it is copying it to, and rounds that down to cover the entire start page. This removes the dirty page over the delalloc extent from the page cache without having written it back. Hence later, when the page cache is flushed, the page at offset 0x6f000 has not been written back and hence exposes stale data, which fsx trips over less than 10 operations later. Fix this by changing xfs_reflink_remap_prep() to use xfs_flush_unmap_range(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_bmap_util.h | 3 +++ fs/xfs/xfs_reflink.c | 17 +++++++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 167ff4297e5c..404e581f1ea1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1042,7 +1042,7 @@ out_trans_cancel: goto out_unlock; } -static int +int xfs_flush_unmap_range( struct xfs_inode *ip, xfs_off_t offset, diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 87363d136bb6..7a78229cf1a7 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -80,4 +80,7 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, xfs_extnum_t *nextents, xfs_filblks_t *count); +int xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t len); + #endif /* __XFS_BMAP_UTIL_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c56bdbfcf7ae..322a852ce284 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1352,10 +1352,19 @@ xfs_reflink_remap_prep( if (ret) goto out_unlock; - /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, - round_down(pos_out, PAGE_SIZE), - round_up(pos_out + *len, PAGE_SIZE) - 1); + /* + * If pos_out > EOF, we may have dirtied blocks between EOF and + * pos_out. In that case, we need to extend the flush and unmap to cover + * from EOF to the end of the copy length. + */ + if (pos_out > XFS_ISIZE(dest)) { + loff_t flen = *len + (pos_out - XFS_ISIZE(dest)); + ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen); + } else { + ret = xfs_flush_unmap_range(dest, pos_out, *len); + } + if (ret) + goto out_unlock; return 1; out_unlock: -- cgit From a579121f94aba4e8bad1a121a0fad050d6925296 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 27 Nov 2018 11:01:43 -0800 Subject: xfs: fix PAGE_MASK usage in xfs_free_file_space In commit e53c4b598, I *tried* to teach xfs to force writeback when we fzero/fpunch right up to EOF so that if EOF is in the middle of a page, the post-EOF part of the page gets zeroed before we return to userspace. Unfortunately, I missed the part where PAGE_MASK is ~(PAGE_SIZE - 1), which means that we totally fail to zero if we're fpunching and EOF is within the first page. Worse yet, the same PAGE_MASK thinko plagues the filemap_write_and_wait_range call, so we'd initiate writeback of the entire file, which (mostly) masked the thinko. Drop the tricky PAGE_MASK and replace it with correct usage of PAGE_SIZE and the proper rounding macros. Fixes: e53c4b598 ("xfs: ensure post-EOF zeroing happens after zeroing part of a file") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_bmap_util.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 404e581f1ea1..1ee8c5539fa4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1126,9 +1126,9 @@ xfs_free_file_space( * page could be mmap'd and iomap_zero_range doesn't do that for us. * Writeback of the eof page will do this, albeit clumsily. */ - if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) { + if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - (offset + len) & ~PAGE_MASK, LLONG_MAX); + round_down(offset + len, PAGE_SIZE), LLONG_MAX); } return error; -- cgit