aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2022-03-23 16:19:23 +0000
committerDavid Sterba <dsterba@suse.com>2022-05-16 17:03:09 +0200
commitb023e67512accd01d6daadb0244b3b430f3b2b6e (patch)
tree014c735969278d9148fda8005813b3dff2aba510
parentb0a66a3137bde6e011e2b14b135e8ea18facb68c (diff)
btrfs: avoid blocking on page locks with nowait dio on compressed range
If we are doing NOWAIT direct IO read/write and our inode has compressed extents, we call filemap_fdatawrite_range() against the range in order to wait for compressed writeback to complete, since the generic code at iomap_dio_rw() calls filemap_write_and_wait_range() once, which is not enough to wait for compressed writeback to complete. This call to filemap_fdatawrite_range() can block on page locks, since the first writepages() on a range that we will try to compress results only in queuing a work to compress the data while holding the pages locked. Even though the generic code at iomap_dio_rw() will do the right thing and return -EAGAIN for NOWAIT requests in case there are pages in the range, we can still end up at btrfs_dio_iomap_begin() with pages in the range because either of the following can happen: 1) Memory mapped writes, as we haven't locked the range yet; 2) Buffered reads might have started, which lock the pages, and we do the filemap_fdatawrite_range() call before locking the file range. So don't call filemap_fdatawrite_range() at btrfs_dio_iomap_begin() if we are doing a NOWAIT read/write. Instead call filemap_range_needs_writeback() to check if there are any locked, dirty, or under writeback pages, and return -EAGAIN if that's the case. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/inode.c34
1 files changed, 26 insertions, 8 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c4b68be7edd7..a240b7fff984 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7532,17 +7532,35 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
lockend = start + len - 1;
/*
- * The generic stuff only does filemap_write_and_wait_range, which
- * isn't enough if we've written compressed pages to this area, so we
- * need to flush the dirty pages again to make absolutely sure that any
- * outstanding dirty pages are on disk.
+ * iomap_dio_rw() only does filemap_write_and_wait_range(), which isn't
+ * enough if we've written compressed pages to this area, so we need to
+ * flush the dirty pages again to make absolutely sure that any
+ * outstanding dirty pages are on disk - the first flush only starts
+ * compression on the data, while keeping the pages locked, so by the
+ * time the second flush returns we know bios for the compressed pages
+ * were submitted and finished, and the pages no longer under writeback.
+ *
+ * If we have a NOWAIT request and we have any pages in the range that
+ * are locked, likely due to compression still in progress, we don't want
+ * to block on page locks. We also don't want to block on pages marked as
+ * dirty or under writeback (same as for the non-compression case).
+ * iomap_dio_rw() did the same check, but after that and before we got
+ * here, mmap'ed writes may have happened or buffered reads started
+ * (readpage() and readahead(), which lock pages), as we haven't locked
+ * the file range yet.
*/
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags)) {
- ret = filemap_fdatawrite_range(inode->i_mapping, start,
- start + length - 1);
- if (ret)
- return ret;
+ if (flags & IOMAP_NOWAIT) {
+ if (filemap_range_needs_writeback(inode->i_mapping,
+ lockstart, lockend))
+ return -EAGAIN;
+ } else {
+ ret = filemap_fdatawrite_range(inode->i_mapping, start,
+ start + length - 1);
+ if (ret)
+ return ret;
+ }
}
dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);