aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <[email protected]>2024-09-03 09:59:23 +0200
committerChristian Brauner <[email protected]>2024-09-03 15:01:24 +0200
commit3d693c1811e3b57438dc6a4e26a420da6def09b4 (patch)
tree2157d68e1fa5203c13cd5e87c1863bffe8424cd8
parent6f634eb080161baa4811a39f5ec07923ede092bc (diff)
parent7d9b474ee4cc375393d13e69d60a30f9b7b46a3a (diff)
Merge patch series "iomap: flush dirty cache over unwritten mappings on zero range"
Brian Foster <[email protected]> says: Two fixes for iomap zero range flushes. * patches from https://lore.kernel.org/r/[email protected]: iomap: make zero range flush conditional on unwritten mappings iomap: fix handling of dirty folios over unwritten extents Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
-rw-r--r--fs/iomap/buffered-io.c63
-rw-r--r--fs/xfs/xfs_iops.c10
2 files changed, 59 insertions, 14 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index e79d11701553..de47698a029a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1394,16 +1394,53 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
}
EXPORT_SYMBOL_GPL(iomap_file_unshare);
-static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
+/*
+ * Flush the remaining range of the iter and mark the current mapping stale.
+ * This is used when zero range sees an unwritten mapping that may have had
+ * dirty pagecache over it.
+ */
+static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
+{
+ struct address_space *mapping = i->inode->i_mapping;
+ loff_t end = i->pos + i->len - 1;
+
+ i->iomap.flags |= IOMAP_F_STALE;
+ return filemap_write_and_wait_range(mapping, i->pos, end);
+}
+
+static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
+ bool *range_dirty)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
loff_t written = 0;
- /* already zeroed? we're done. */
- if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
+ /*
+ * We must zero subranges of unwritten mappings that might be dirty in
+ * pagecache from previous writes. We only know whether the entire range
+ * was clean or not, however, and dirty folios may have been written
+ * back or reclaimed at any point after mapping lookup.
+ *
+ * The easiest way to deal with this is to flush pagecache to trigger
+ * any pending unwritten conversions and then grab the updated extents
+ * from the fs. The flush may change the current mapping, so mark it
+ * stale for the iterator to remap it for the next pass to handle
+ * properly.
+ *
+ * Note that holes are treated the same as unwritten because zero range
+ * is (ab)used for partial folio zeroing in some cases. Hole backed
+ * post-eof ranges can be dirtied via mapped write and the flush
+ * triggers writeback time post-eof zeroing.
+ */
+ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
+ if (*range_dirty) {
+ *range_dirty = false;
+ return iomap_zero_iter_flush_and_stale(iter);
+ }
+ /* range is clean and already zeroed, nothing to do */
return length;
+ }
do {
struct folio *folio;
@@ -1451,9 +1488,27 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
.flags = IOMAP_ZERO,
};
int ret;
+ bool range_dirty;
+
+ /*
+ * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
+ * pagecache must be flushed to ensure stale data from previous
+ * buffered writes is not exposed. A flush is only required for certain
+ * types of mappings, but checking pagecache after mapping lookup is
+ * racy with writeback and reclaim.
+ *
+ * Therefore, check the entire range first and pass along whether any
+ * part of it is dirty. If so and an underlying mapping warrants it,
+ * flush the cache at that point. This trades off the occasional false
+ * positive (and spurious flush, if the dirty data and mapping don't
+ * happen to overlap) for simplicity in handling a relatively uncommon
+ * situation.
+ */
+ range_dirty = filemap_range_needs_writeback(inode->i_mapping,
+ pos, pos + len - 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_zero_iter(&iter, did_zero);
+ iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 6483b4c4cf35..ee79cf161312 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -870,16 +870,6 @@ xfs_setattr_size(
error = xfs_zero_range(ip, oldsize, newsize - oldsize,
&did_zeroing);
} else {
- /*
- * iomap won't detect a dirty page over an unwritten block (or a
- * cow block over a hole) and subsequently skips zeroing the
- * newly post-EOF portion of the page. Flush the new EOF to
- * convert the block before the pagecache truncate.
- */
- error = filemap_write_and_wait_range(inode->i_mapping, newsize,
- newsize);
- if (error)
- return error;
error = xfs_truncate_page(ip, newsize, &did_zeroing);
}