From 3590c4d8979bcc364e2ded95ab3966b4e436b7bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:41:34 -0700 Subject: iomap: ignore non-shared or non-data blocks in xfs_file_dirty xfs_file_dirty is used to unshare reflink blocks. Rename the function to xfs_file_unshare to better document that purpose, and skip iomaps that are not shared and don't need zeroing. This will allow to simplify the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 0f08153b4994..a9634110c783 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1442,7 +1442,7 @@ xfs_reflink_dirty_extents( flen = XFS_FSB_TO_B(mp, rlen); if (fpos + flen > isize) flen = isize - fpos; - error = iomap_file_dirty(VFS_I(ip), fpos, flen, + error = iomap_file_unshare(VFS_I(ip), fpos, flen, &xfs_iomap_ops); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) -- cgit From dd26b84640cc92a0dc30ea5feee2a7b30852ac06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Oct 2019 09:09:43 -0700 Subject: xfs: remove xfs_reflink_dirty_extents Now that xfs_file_unshare is not completely dumb we can just call it directly without iterating the extent and reflink btrees ourselves. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_reflink.c | 103 +++------------------------------------------------ 1 file changed, 5 insertions(+), 98 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a9634110c783..7fc728a8852b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1381,85 +1381,6 @@ out_unlock: return ret; } -/* - * The user wants to preemptively CoW all shared blocks in this file, - * which enables us to turn off the reflink flag. Iterate all - * extents which are not prealloc/delalloc to see which ranges are - * mentioned in the refcount tree, then read those blocks into the - * pagecache, dirty them, fsync them back out, and then we can update - * the inode flag. What happens if we run out of memory? :) - */ -STATIC int -xfs_reflink_dirty_extents( - struct xfs_inode *ip, - xfs_fileoff_t fbno, - xfs_filblks_t end, - xfs_off_t isize) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_extlen_t aglen; - xfs_agblock_t rbno; - xfs_extlen_t rlen; - xfs_off_t fpos; - xfs_off_t flen; - struct xfs_bmbt_irec map[2]; - int nmaps; - int error = 0; - - while (end - fbno > 0) { - nmaps = 1; - /* - * Look for extents in the file. Skip holes, delalloc, or - * unwritten extents; they can't be reflinked. - */ - error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); - if (error) - goto out; - if (nmaps == 0) - break; - if (!xfs_bmap_is_real_extent(&map[0])) - goto next; - - map[1] = map[0]; - while (map[1].br_blockcount) { - agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); - aglen = map[1].br_blockcount; - - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, - aglen, &rbno, &rlen, true); - if (error) - goto out; - if (rbno == NULLAGBLOCK) - break; - - /* Dirty the pages */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - fpos = XFS_FSB_TO_B(mp, map[1].br_startoff + - (rbno - agbno)); - flen = XFS_FSB_TO_B(mp, rlen); - if (fpos + flen > isize) - flen = isize - fpos; - error = iomap_file_unshare(VFS_I(ip), fpos, flen, - &xfs_iomap_ops); - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (error) - goto out; - - map[1].br_blockcount -= (rbno - agbno + rlen); - map[1].br_startoff += (rbno - agbno + rlen); - map[1].br_startblock += (rbno - agbno + rlen); - } - -next: - fbno = map[0].br_startoff + map[0].br_blockcount; - } -out: - return error; -} - /* Does this inode need the reflink flag? */ int xfs_reflink_inode_has_shared_extents( @@ -1596,10 +1517,7 @@ xfs_reflink_unshare( xfs_off_t offset, xfs_off_t len) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t fbno; - xfs_filblks_t end; - xfs_off_t isize; + struct inode *inode = VFS_I(ip); int error; if (!xfs_is_reflink_inode(ip)) @@ -1607,20 +1525,12 @@ xfs_reflink_unshare( trace_xfs_reflink_unshare(ip, offset, len); - inode_dio_wait(VFS_I(ip)); + inode_dio_wait(inode); - /* Try to CoW the selected ranges */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - fbno = XFS_B_TO_FSBT(mp, offset); - isize = i_size_read(VFS_I(ip)); - end = XFS_B_TO_FSB(mp, offset + len); - error = xfs_reflink_dirty_extents(ip, fbno, end, isize); + error = iomap_file_unshare(inode, offset, len, &xfs_iomap_ops); if (error) - goto out_unlock; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* Wait for the IO to finish */ - error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + goto out; + error = filemap_write_and_wait(inode->i_mapping); if (error) goto out; @@ -1628,11 +1538,8 @@ xfs_reflink_unshare( error = xfs_reflink_try_clear_inode_flag(ip); if (error) goto out; - return 0; -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; -- cgit From ffb375a8cf208a5dab818f65b633cdf368f7953c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Oct 2019 09:09:43 -0700 Subject: xfs: pass two imaps to xfs_reflink_allocate_cow xfs_reflink_allocate_cow consumes the source data fork imap, and potentially returns the COW fork imap. Split the arguments in two to clear up the calling conventions and to prepare for returning a source iomap from ->iomap_begin. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 8 ++++---- fs/xfs/xfs_reflink.c | 30 +++++++++++++++--------------- fs/xfs/xfs_reflink.h | 4 ++-- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f1d32bcf48bd..2cd546531b74 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -996,9 +996,8 @@ xfs_file_iomap_begin( goto out_found; /* may drop and re-acquire the ilock */ - cmap = imap; - error = xfs_reflink_allocate_cow(ip, &cmap, &shared, &lockmode, - directio); + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, + &lockmode, directio); if (error) goto out_unlock; @@ -1011,7 +1010,8 @@ xfs_file_iomap_begin( * newly allocated address. If the data fork has a hole, copy * the COW fork mapping to avoid allocating to the data fork. */ - if (directio || imap.br_startblock == HOLESTARTBLOCK) + if (shared && + (directio || imap.br_startblock == HOLESTARTBLOCK)) imap = cmap; end_fsb = imap.br_startoff + imap.br_blockcount; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 7fc728a8852b..19a6e4644123 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -308,13 +308,13 @@ static int xfs_find_trim_cow_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, bool *found) { xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; struct xfs_iext_cursor icur; - struct xfs_bmbt_irec got; *found = false; @@ -322,23 +322,22 @@ xfs_find_trim_cow_extent( * If we don't find an overlapping extent, trim the range we need to * allocate to fit the hole we found. */ - if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got)) - got.br_startoff = offset_fsb + count_fsb; - if (got.br_startoff > offset_fsb) { + if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, cmap)) + cmap->br_startoff = offset_fsb + count_fsb; + if (cmap->br_startoff > offset_fsb) { xfs_trim_extent(imap, imap->br_startoff, - got.br_startoff - imap->br_startoff); + cmap->br_startoff - imap->br_startoff); return xfs_inode_need_cow(ip, imap, shared); } *shared = true; - if (isnullstartblock(got.br_startblock)) { - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + if (isnullstartblock(cmap->br_startblock)) { + xfs_trim_extent(imap, cmap->br_startoff, cmap->br_blockcount); return 0; } /* real extent found - no need to allocate */ - xfs_trim_extent(&got, offset_fsb, count_fsb); - *imap = got; + xfs_trim_extent(cmap, offset_fsb, count_fsb); *found = true; return 0; } @@ -348,6 +347,7 @@ int xfs_reflink_allocate_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now) @@ -367,7 +367,7 @@ xfs_reflink_allocate_cow( xfs_ifork_init_cow(ip); } - error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) return error; if (found) @@ -392,7 +392,7 @@ xfs_reflink_allocate_cow( /* * Check for an overlapping extent again now that we dropped the ilock. */ - error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) goto out_trans_cancel; if (found) { @@ -411,7 +411,7 @@ xfs_reflink_allocate_cow( nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, - resblks, imap, &nimaps); + resblks, cmap, &nimaps); if (error) goto out_unreserve; @@ -427,15 +427,15 @@ xfs_reflink_allocate_cow( if (nimaps == 0) return -ENOSPC; convert: - xfs_trim_extent(imap, offset_fsb, count_fsb); + xfs_trim_extent(cmap, offset_fsb, count_fsb); /* * COW fork extents are supposed to remain unwritten until we're ready * to initiate a disk write. For direct I/O we are going to write the * data and need the conversion, but for buffered writes we're done. */ - if (!convert_now || imap->br_state == XFS_EXT_NORM) + if (!convert_now || cmap->br_state == XFS_EXT_NORM) return 0; - trace_xfs_reflink_convert_cow(ip, imap); + trace_xfs_reflink_convert_cow(ip, cmap); return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); out_unreserve: diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 28a43b7f581d..d18ad7f4fb64 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -25,8 +25,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, bool xfs_inode_need_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared); -extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, - struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode, +int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now); extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); -- cgit From f150b4234397448c6abab8785e58a222bfd9ec00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Oct 2019 09:09:46 -0700 Subject: xfs: split the iomap ops for buffered vs direct writes Instead of lots of magic conditionals in the main write_begin handler this make the intent very clear. Thing will become even better once we support delayed allocations for extent size hints and realtime allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 3 ++- fs/xfs/xfs_file.c | 16 ++++++++----- fs/xfs/xfs_iomap.c | 61 +++++++++++++++++--------------------------------- fs/xfs/xfs_iomap.h | 3 ++- fs/xfs/xfs_iops.c | 4 ++-- fs/xfs/xfs_reflink.c | 5 +++-- 6 files changed, 40 insertions(+), 52 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 4f443703065e..5d8632b7f549 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1113,7 +1113,8 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + error = iomap_zero_range(VFS_I(ip), offset, len, NULL, + &xfs_buffered_write_iomap_ops); if (error) return error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e3299ffdf090..24659667d5cb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -352,7 +352,7 @@ restart: trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, - NULL, &xfs_iomap_ops); + NULL, &xfs_buffered_write_iomap_ops); if (error) return error; } else @@ -552,7 +552,8 @@ xfs_file_dio_aio_write( * If unaligned, this is the only IO in-flight. Wait on it before we * release the iolock to prevent subsequent overlapping IO. */ - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops, + ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, + &xfs_dio_write_ops, is_sync_kiocb(iocb) || unaligned_io); out: xfs_iunlock(ip, iolock); @@ -592,7 +593,7 @@ xfs_file_dax_write( count = iov_iter_count(from); trace_xfs_file_dax_write(ip, count, pos); - ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -639,7 +640,8 @@ write_retry: current->backing_dev_info = inode_to_bdi(inode); trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); - ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); + ret = iomap_file_buffered_write(iocb, from, + &xfs_buffered_write_iomap_ops); if (likely(ret >= 0)) iocb->ki_pos += ret; @@ -1156,12 +1158,14 @@ __xfs_filemap_fault( ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, (write_fault && !vmf->cow_page) ? - &xfs_iomap_ops : &xfs_read_iomap_ops); + &xfs_direct_write_iomap_ops : + &xfs_read_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { if (write_fault) - ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, + &xfs_buffered_write_iomap_ops); else ret = filemap_fault(vmf); } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index bbe0ca4ff10d..a706da8ffe22 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -719,16 +719,7 @@ relock: } static int -xfs_file_iomap_begin_delay( - struct inode *inode, - loff_t offset, - loff_t count, - unsigned flags, - struct iomap *iomap, - struct iomap *srcmap); - -static int -xfs_file_iomap_begin( +xfs_direct_write_iomap_begin( struct inode *inode, loff_t offset, loff_t length, @@ -751,13 +742,6 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (!(flags & IOMAP_DIRECT) && !IS_DAX(inode) && - !xfs_get_extsz_hint(ip)) { - /* Reserve delalloc blocks for regular writeback. */ - return xfs_file_iomap_begin_delay(inode, offset, length, flags, - iomap, srcmap); - } - /* * Lock the inode in the manner required for the specified operation and * check for as many conditions that would result in blocking as @@ -864,8 +848,12 @@ out_unlock: return error; } +const struct iomap_ops xfs_direct_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, +}; + static int -xfs_file_iomap_begin_delay( +xfs_buffered_write_iomap_begin( struct inode *inode, loff_t offset, loff_t count, @@ -884,8 +872,12 @@ xfs_file_iomap_begin_delay( int whichfork = XFS_DATA_FORK; int error = 0; + /* we can't use delayed allocations when using extent size hints */ + if (xfs_get_extsz_hint(ip)) + return xfs_direct_write_iomap_begin(inode, offset, count, + flags, iomap, srcmap); + ASSERT(!XFS_IS_REALTIME_INODE(ip)); - ASSERT(!xfs_get_extsz_hint(ip)); xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1077,18 +1069,23 @@ out_unlock: } static int -xfs_file_iomap_end_delalloc( - struct xfs_inode *ip, +xfs_buffered_write_iomap_end( + struct inode *inode, loff_t offset, loff_t length, ssize_t written, + unsigned flags, struct iomap *iomap) { + struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; + if (iomap->type != IOMAP_DELALLOC) + return 0; + /* * Behave as if the write failed if drop writes is enabled. Set the NEW * flag to force delalloc cleanup. @@ -1133,25 +1130,9 @@ xfs_file_iomap_end_delalloc( return 0; } -static int -xfs_file_iomap_end( - struct inode *inode, - loff_t offset, - loff_t length, - ssize_t written, - unsigned flags, - struct iomap *iomap) -{ - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && - iomap->type == IOMAP_DELALLOC) - return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written, iomap); - return 0; -} - -const struct iomap_ops xfs_iomap_ops = { - .iomap_begin = xfs_file_iomap_begin, - .iomap_end = xfs_file_iomap_end, +const struct iomap_ops xfs_buffered_write_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, + .iomap_end = xfs_buffered_write_iomap_end, }; static int diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 61b1fc3e5143..7aed28275089 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -39,7 +39,8 @@ xfs_aligned_fsb_count( return count_fsb; } -extern const struct iomap_ops xfs_iomap_ops; +extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 9c448a54a951..329a34af8e79 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -883,10 +883,10 @@ xfs_setattr_size( if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_iomap_ops); + &did_zeroing, &xfs_buffered_write_iomap_ops); } else { error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_iomap_ops); + &xfs_buffered_write_iomap_ops); } if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 19a6e4644123..1e18b4024b82 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1270,7 +1270,7 @@ xfs_reflink_zero_posteof( trace_xfs_zero_eof(ip, isize, pos - isize); return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_iomap_ops); + &xfs_buffered_write_iomap_ops); } /* @@ -1527,7 +1527,8 @@ xfs_reflink_unshare( inode_dio_wait(inode); - error = iomap_file_unshare(inode, offset, len, &xfs_iomap_ops); + error = iomap_file_unshare(inode, offset, len, + &xfs_buffered_write_iomap_ops); if (error) goto out; error = filemap_write_and_wait(inode->i_mapping); -- cgit From da781e64b28c1d72f84bab6a884359c9c8d522aa Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 21 Oct 2019 09:26:48 -0700 Subject: xfs: don't set bmapi total block req where minleft is xfs_bmapi_write() takes a total block requirement parameter that is passed down to the block allocation code and is used to specify the total block requirement of the associated transaction. This is used to try and select an AG that can not only satisfy the requested extent allocation, but can also accommodate subsequent allocations that might be required to complete the transaction. For example, additional bmbt block allocations may be required on insertion of the resulting extent to an inode data fork. While it's important for callers to calculate and reserve such extra blocks in the transaction, it is not necessary to pass the total value to xfs_bmapi_write() in all cases. The latter automatically sets minleft to ensure that sufficient free blocks remain after the allocation attempt to expand the format of the associated inode (i.e., such as extent to btree conversion, btree splits, etc). Therefore, any callers that pass a total block requirement of the bmap mapping length plus worst case bmbt expansion essentially specify the additional reservation requirement twice. These callers can pass a total of zero to rely on the bmapi minleft policy. Beyond being superfluous, the primary motivation for this change is that the total reservation logic in the bmbt code is dubious in scenarios where minlen < maxlen and a maxlen extent cannot be allocated (which is more common for data extent allocations where contiguity is not required). The total value is based on maxlen in the xfs_bmapi_write() caller. If the bmbt code falls back to an allocation between minlen and maxlen, that allocation will not succeed until total is reset to minlen, which essentially throws away any additional reservation included in total by the caller. In addition, the total value is not reset until after alignment is dropped, which means that such callers drop alignment far too aggressively than necessary. Update all callers of xfs_bmapi_write() that pass a total block value of the mapping length plus bmbt reservation to instead pass zero and rely on xfs_bmapi_minleft() to enforce the bmbt reservation requirement. This trades off slightly less conservative AG selection for the ability to preserve alignment in more scenarios. xfs_bmapi_write() callers that incorporate unrelated or additional reservations in total beyond what is already included in minleft must continue to use the former. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 1 - fs/xfs/xfs_bmap_util.c | 4 ++-- fs/xfs/xfs_dquot.c | 4 ++-- fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_reflink.c | 4 ++-- fs/xfs/xfs_rtalloc.c | 3 +-- 6 files changed, 9 insertions(+), 11 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3b300b518f69..392a809c13e8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4511,7 +4511,6 @@ xfs_bmapi_convert_delalloc( bma.wasdel = true; bma.offset = bma.got.br_startoff; bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN); - bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); if (whichfork == XFS_COW_FORK) bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5d8632b7f549..99bf372ed551 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -964,8 +964,8 @@ xfs_alloc_file_space( xfs_trans_ijoin(tp, ip, 0); error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, alloc_type, resblks, - imapp, &nimaps); + allocatesize_fsb, alloc_type, 0, imapp, + &nimaps); if (error) goto error0; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index aeb95e7391c1..b924dbd63a7d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -305,8 +305,8 @@ xfs_dquot_disk_alloc( /* Create the block mapping. */ xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps); + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, + &nmaps); if (error) return error; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index bf0c7756ac90..e8fb500e1880 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -285,8 +285,8 @@ xfs_iomap_write_direct( * caller gave to us. */ nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - bmapi_flags, resblks, imap, &nimaps); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, + imap, &nimaps); if (error) goto out_res_cancel; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1e18b4024b82..de451235c4ee 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -410,8 +410,8 @@ xfs_reflink_allocate_cow( /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, - resblks, cmap, &nimaps); + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, + &nimaps); if (error) goto out_unreserve; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 4a48a8c75b4f..d42b5a2047e0 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -792,8 +792,7 @@ xfs_growfs_rt_alloc( */ nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, resblks, &map, - &nmap); + XFS_BMAPI_METADATA, 0, &map, &nmap); if (!error && nmap < 1) error = -ENOSPC; if (error) -- cgit