diff options
author | Christian Brauner <[email protected]> | 2024-09-03 09:59:23 +0200 |
---|---|---|
committer | Christian Brauner <[email protected]> | 2024-09-03 15:01:24 +0200 |
commit | 3d693c1811e3b57438dc6a4e26a420da6def09b4 (patch) | |
tree | 2157d68e1fa5203c13cd5e87c1863bffe8424cd8 | |
parent | 6f634eb080161baa4811a39f5ec07923ede092bc (diff) | |
parent | 7d9b474ee4cc375393d13e69d60a30f9b7b46a3a (diff) |
Merge patch series "iomap: flush dirty cache over unwritten mappings on zero range"
Brian Foster <[email protected]> says:
Two fixes for iomap zero range flushes.
* patches from https://lore.kernel.org/r/[email protected]:
iomap: make zero range flush conditional on unwritten mappings
iomap: fix handling of dirty folios over unwritten extents
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Christian Brauner <[email protected]>
-rw-r--r-- | fs/iomap/buffered-io.c | 63 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 10 |
2 files changed, 59 insertions, 14 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e79d11701553..de47698a029a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1394,16 +1394,53 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, } EXPORT_SYMBOL_GPL(iomap_file_unshare); -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) +/* + * Flush the remaining range of the iter and mark the current mapping stale. + * This is used when zero range sees an unwritten mapping that may have had + * dirty pagecache over it. + */ +static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) +{ + struct address_space *mapping = i->inode->i_mapping; + loff_t end = i->pos + i->len - 1; + + i->iomap.flags |= IOMAP_F_STALE; + return filemap_write_and_wait_range(mapping, i->pos, end); +} + +static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, + bool *range_dirty) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; loff_t length = iomap_length(iter); loff_t written = 0; - /* already zeroed? we're done. */ - if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) + /* + * We must zero subranges of unwritten mappings that might be dirty in + * pagecache from previous writes. We only know whether the entire range + * was clean or not, however, and dirty folios may have been written + * back or reclaimed at any point after mapping lookup. + * + * The easiest way to deal with this is to flush pagecache to trigger + * any pending unwritten conversions and then grab the updated extents + * from the fs. The flush may change the current mapping, so mark it + * stale for the iterator to remap it for the next pass to handle + * properly. + * + * Note that holes are treated the same as unwritten because zero range + * is (ab)used for partial folio zeroing in some cases. Hole backed + * post-eof ranges can be dirtied via mapped write and the flush + * triggers writeback time post-eof zeroing. + */ + if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) { + if (*range_dirty) { + *range_dirty = false; + return iomap_zero_iter_flush_and_stale(iter); + } + /* range is clean and already zeroed, nothing to do */ return length; + } do { struct folio *folio; @@ -1451,9 +1488,27 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, .flags = IOMAP_ZERO, }; int ret; + bool range_dirty; + + /* + * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but + * pagecache must be flushed to ensure stale data from previous + * buffered writes is not exposed. A flush is only required for certain + * types of mappings, but checking pagecache after mapping lookup is + * racy with writeback and reclaim. + * + * Therefore, check the entire range first and pass along whether any + * part of it is dirty. If so and an underlying mapping warrants it, + * flush the cache at that point. This trades off the occasional false + * positive (and spurious flush, if the dirty data and mapping don't + * happen to overlap) for simplicity in handling a relatively uncommon + * situation. + */ + range_dirty = filemap_range_needs_writeback(inode->i_mapping, + pos, pos + len - 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_zero_iter(&iter, did_zero); + iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty); return ret; } EXPORT_SYMBOL_GPL(iomap_zero_range); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 6483b4c4cf35..ee79cf161312 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -870,16 +870,6 @@ xfs_setattr_size( error = xfs_zero_range(ip, oldsize, newsize - oldsize, &did_zeroing); } else { - /* - * iomap won't detect a dirty page over an unwritten block (or a - * cow block over a hole) and subsequently skips zeroing the - * newly post-EOF portion of the page. Flush the new EOF to - * convert the block before the pagecache truncate. - */ - error = filemap_write_and_wait_range(inode->i_mapping, newsize, - newsize); - if (error) - return error; error = xfs_truncate_page(ip, newsize, &did_zeroing); } |