From b2ccab3199aa7cea9154d80ea2585312c5f6eba0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 18:59:53 -0700 Subject: xfs: pass per-ag references to xfs_free_extent Pass a reference to the per-AG structure to xfs_free_extent. Most callers already have one, so we can eliminate unnecessary lookups. The one exception to this is the EFI code, which the next patch will fix. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/xfs/libxfs/xfs_ag.c') diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 86696a1c6891..ae45f546ed86 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1043,10 +1043,8 @@ xfs_ag_extend_space( if (error) return error; - error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno, - be32_to_cpu(agf->agf_length) - len), - len, &XFS_RMAP_OINFO_SKIP_UPDATE, - XFS_AG_RESV_NONE); + error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, + len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); if (error) return error; -- cgit From 9b2e5a234c89f097ec36f922763dfa1465dc06f8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 18:59:55 -0700 Subject: xfs: create traced helper to get extra perag references There are a few places in the XFS codebase where a caller has either an active or a passive reference to a perag structure and wants to give a passive reference to some other piece of code. Btree cursor creation and inode walks are good examples of this. Replace the open-coded logic with a helper to do this. The new function adds a few safeguards -- it checks that there's at least one reference to the perag structure passed in, and it records the refcount bump in the ftrace information. This makes it much easier to debug perag refcounting problems. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 13 +++++++++++++ fs/xfs/libxfs/xfs_ag.h | 1 + fs/xfs/libxfs/xfs_alloc_btree.c | 4 +--- fs/xfs/libxfs/xfs_ialloc_btree.c | 4 +--- fs/xfs/libxfs/xfs_refcount_btree.c | 5 +---- fs/xfs/libxfs/xfs_rmap_btree.c | 5 +---- fs/xfs/xfs_iunlink_item.c | 4 +--- fs/xfs/xfs_iwalk.c | 5 ++--- fs/xfs/xfs_trace.h | 1 + 9 files changed, 22 insertions(+), 20 deletions(-) (limited to 'fs/xfs/libxfs/xfs_ag.c') diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index ae45f546ed86..2d8910046ed9 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -81,6 +81,19 @@ xfs_perag_get_tag( return pag; } +/* Get a passive reference to the given perag. */ +struct xfs_perag * +xfs_perag_hold( + struct xfs_perag *pag) +{ + ASSERT(atomic_read(&pag->pag_ref) > 0 || + atomic_read(&pag->pag_active_ref) > 0); + + trace_xfs_perag_hold(pag, _RET_IP_); + atomic_inc(&pag->pag_ref); + return pag; +} + void xfs_perag_put( struct xfs_perag *pag) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 5e18536dfdce..8092eaba977d 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -134,6 +134,7 @@ void xfs_free_perag(struct xfs_mount *mp); struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int tag); +struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); /* Active AG references */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 0f29c7b1b39f..8e8416c14cec 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -492,9 +492,7 @@ xfs_allocbt_init_common( cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); } - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; + cur->bc_ag.pag = xfs_perag_hold(pag); if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 1d2af50ac95b..ad6c521f05eb 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -450,9 +450,7 @@ xfs_inobt_init_common( if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; + cur->bc_ag.pag = xfs_perag_hold(pag); return cur; } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 749e837de98d..03d2b01487a1 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -340,10 +340,7 @@ xfs_refcountbt_init_common( cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; - + cur->bc_ag.pag = xfs_perag_hold(pag); cur->bc_ag.refc.nr_ops = 0; cur->bc_ag.refc.shape_changes = 0; cur->bc_ops = &xfs_refcountbt_ops; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index d3285684bb5e..56d074b42660 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -460,10 +460,7 @@ xfs_rmapbt_init_common( cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_ops = &xfs_rmapbt_ops; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; - + cur->bc_ag.pag = xfs_perag_hold(pag); return cur; } diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c index 43005ce8bd48..2ddccb172fa0 100644 --- a/fs/xfs/xfs_iunlink_item.c +++ b/fs/xfs/xfs_iunlink_item.c @@ -168,9 +168,7 @@ xfs_iunlink_log_inode( iup->ip = ip; iup->next_agino = next_agino; iup->old_agino = ip->i_next_unlinked; - - atomic_inc(&pag->pag_ref); - iup->pag = pag; + iup->pag = xfs_perag_hold(pag); xfs_trans_add_item(tp, &iup->item); tp->t_flags |= XFS_TRANS_DIRTY; diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 21be93bf006d..b3275e8d47b6 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -667,11 +667,10 @@ xfs_iwalk_threaded( iwag->mp = mp; /* - * perag is being handed off to async work, so take another + * perag is being handed off to async work, so take a passive * reference for the async work to release. */ - atomic_inc(&pag->pag_ref); - iwag->pag = pag; + iwag->pag = xfs_perag_hold(pag); iwag->iwalk_fn = iwalk_fn; iwag->data = data; iwag->startino = startino; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 9c0006c55fec..db09bb771765 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -190,6 +190,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ TP_ARGS(pag, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_hold); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_grab); DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag); -- cgit From d5c88131dbf01a30a222ad82d58e0c21a15f0d8e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 18:59:58 -0700 Subject: xfs: allow queued AG intents to drain before scrubbing When a writer thread executes a chain of log intent items, the AG header buffer locks will cycle during a transaction roll to get from one intent item to the next in a chain. Although scrub takes all AG header buffer locks, this isn't sufficient to guard against scrub checking an AG while that writer thread is in the middle of finishing a chain because there's no higher level locking primitive guarding allocation groups. When there's a collision, cross-referencing between data structures (e.g. rmapbt and refcountbt) yields false corruption events; if repair is running, this results in incorrect repairs, which is catastrophic. Fix this by adding to the perag structure the count of active intents and make scrub wait until it has both AG header buffer locks and the intent counter reaches zero. One quirk of the drain code is that deferred bmap updates also bump and drop the intent counter. A fundamental decision made during the design phase of the reverse mapping feature is that updates to the rmapbt records are always made by the same code that updates the primary metadata. In other words, callers of bmapi functions expect that the bmapi functions will queue deferred rmap updates. Some parts of the reflink code queue deferred refcount (CUI) and bmap (BUI) updates in the same head transaction, but the deferred work manager completely finishes the CUI before the BUI work is started. As a result, the CUI drops the intent count long before the deferred rmap (RUI) update even has a chance to bump the intent count. The only way to keep the intent count elevated between the CUI and RUI is for the BUI to bump the counter until the RUI has been created. A second quirk of the intent drain code is that deferred work items must increment the intent counter as soon as the work item is added to the transaction. When a BUI completes and queues an RUI, the RUI must increment the counter before the BUI decrements it. The only way to accomplish this is to require that the counter be bumped as soon as the deferred work item is created in memory. In the next patches we'll improve on this facility, but this patch provides the basic functionality. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/Kconfig | 4 ++ fs/xfs/Makefile | 2 + fs/xfs/libxfs/xfs_ag.c | 4 ++ fs/xfs/libxfs/xfs_ag.h | 8 +++ fs/xfs/libxfs/xfs_defer.c | 6 +- fs/xfs/scrub/common.c | 111 +++++++++++++++++++++++++++++++---- fs/xfs/scrub/health.c | 2 + fs/xfs/scrub/refcount.c | 2 + fs/xfs/xfs_bmap_item.c | 12 +++- fs/xfs/xfs_drain.c | 140 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_drain.h | 84 +++++++++++++++++++++++++++ fs/xfs/xfs_extfree_item.c | 4 +- fs/xfs/xfs_linux.h | 1 + fs/xfs/xfs_refcount_item.c | 4 +- fs/xfs/xfs_rmap_item.c | 4 +- fs/xfs/xfs_trace.h | 71 +++++++++++++++++++++++ 16 files changed, 438 insertions(+), 21 deletions(-) create mode 100644 fs/xfs/xfs_drain.c create mode 100644 fs/xfs/xfs_drain.h (limited to 'fs/xfs/libxfs/xfs_ag.c') diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 9fac5ea8d0e4..ab24e683b440 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -93,10 +93,14 @@ config XFS_RT If unsure, say N. +config XFS_DRAIN_INTENTS + bool + config XFS_ONLINE_SCRUB bool "XFS online metadata check support" default n depends on XFS_FS + select XFS_DRAIN_INTENTS help If you say Y here you will be able to check metadata on a mounted XFS filesystem. This feature is intended to reduce diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 92d88dc3c9f7..3bdbc838c4d1 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -136,6 +136,8 @@ ifeq ($(CONFIG_MEMORY_FAILURE),y) xfs-$(CONFIG_FS_DAX) += xfs_notify_failure.o endif +xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o + # online scrub/repair ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 2d8910046ed9..1b078bbbf225 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -260,6 +260,7 @@ xfs_free_perag( spin_unlock(&mp->m_perag_lock); ASSERT(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); + xfs_defer_drain_free(&pag->pag_intents_drain); cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_buf_hash_destroy(pag); @@ -385,6 +386,7 @@ xfs_initialize_perag( spin_lock_init(&pag->pag_state_lock); INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + xfs_defer_drain_init(&pag->pag_intents_drain); init_waitqueue_head(&pag->pagb_wait); init_waitqueue_head(&pag->pag_active_wq); pag->pagb_count = 0; @@ -421,6 +423,7 @@ xfs_initialize_perag( return 0; out_remove_pag: + xfs_defer_drain_free(&pag->pag_intents_drain); radix_tree_delete(&mp->m_perag_tree, index); out_free_pag: kmem_free(pag); @@ -431,6 +434,7 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); + xfs_defer_drain_free(&pag->pag_intents_drain); kmem_free(pag); } return error; diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 8092eaba977d..2e0aef87d633 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -101,6 +101,14 @@ struct xfs_perag { /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; + /* + * We use xfs_drain to track the number of deferred log intent items + * that have been queued (but not yet processed) so that waiters (e.g. + * scrub) will not lock resources when other threads are in the middle + * of processing a chain of intent items only to find momentary + * inconsistencies. + */ + struct xfs_defer_drain pag_intents_drain; #endif /* __KERNEL__ */ }; diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 5a321b783398..bcfb6a4203cd 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -397,6 +397,7 @@ xfs_defer_cancel_list( list_for_each_safe(pwi, n, &dfp->dfp_work) { list_del(pwi); dfp->dfp_count--; + trace_xfs_defer_cancel_item(mp, dfp, pwi); ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); @@ -476,6 +477,7 @@ xfs_defer_finish_one( list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; + trace_xfs_defer_finish_item(tp->t_mountp, dfp, li); error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { int ret; @@ -623,7 +625,7 @@ xfs_defer_add( struct list_head *li) { struct xfs_defer_pending *dfp = NULL; - const struct xfs_defer_op_type *ops; + const struct xfs_defer_op_type *ops = defer_op_types[type]; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX); @@ -636,7 +638,6 @@ xfs_defer_add( if (!list_empty(&tp->t_dfops)) { dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); - ops = defer_op_types[dfp->dfp_type]; if (dfp->dfp_type != type || (ops->max_items && dfp->dfp_count >= ops->max_items)) dfp = NULL; @@ -653,6 +654,7 @@ xfs_defer_add( } list_add_tail(li, &dfp->dfp_work); + trace_xfs_defer_add_item(tp->t_mountp, dfp, li); dfp->dfp_count++; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 597e6aca8628..2a496d1699a3 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -396,26 +396,19 @@ want_ag_read_header_failure( } /* - * Grab the perag structure and all the headers for an AG. + * Grab the AG header buffers for the attached perag structure. * * The headers should be released by xchk_ag_free, but as a fail safe we attach * all the buffers we grab to the scrub transaction so they'll all be freed - * when we cancel it. Returns ENOENT if we can't grab the perag structure. + * when we cancel it. */ -int -xchk_ag_read_headers( +static inline int +xchk_perag_read_headers( struct xfs_scrub *sc, - xfs_agnumber_t agno, struct xchk_ag *sa) { - struct xfs_mount *mp = sc->mp; int error; - ASSERT(!sa->pag); - sa->pag = xfs_perag_get(mp, agno); - if (!sa->pag) - return -ENOENT; - error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) return error; @@ -427,6 +420,102 @@ xchk_ag_read_headers( return 0; } +/* + * Grab the AG headers for the attached perag structure and wait for pending + * intents to drain. + */ +static int +xchk_perag_drain_and_lock( + struct xfs_scrub *sc) +{ + struct xchk_ag *sa = &sc->sa; + int error = 0; + + ASSERT(sa->pag != NULL); + ASSERT(sa->agi_bp == NULL); + ASSERT(sa->agf_bp == NULL); + + do { + if (xchk_should_terminate(sc, &error)) + return error; + + error = xchk_perag_read_headers(sc, sa); + if (error) + return error; + + /* + * If we've grabbed an inode for scrubbing then we assume that + * holding its ILOCK will suffice to coordinate with any intent + * chains involving this inode. + */ + if (sc->ip) + return 0; + + /* + * Decide if this AG is quiet enough for all metadata to be + * consistent with each other. XFS allows the AG header buffer + * locks to cycle across transaction rolls while processing + * chains of deferred ops, which means that there could be + * other threads in the middle of processing a chain of + * deferred ops. For regular operations we are careful about + * ordering operations to prevent collisions between threads + * (which is why we don't need a per-AG lock), but scrub and + * repair have to serialize against chained operations. + * + * We just locked all the AG headers buffers; now take a look + * to see if there are any intents in progress. If there are, + * drop the AG headers and wait for the intents to drain. + * Since we hold all the AG header locks for the duration of + * the scrub, this is the only time we have to sample the + * intents counter; any threads increasing it after this point + * can't possibly be in the middle of a chain of AG metadata + * updates. + * + * Obviously, this should be slanted against scrub and in favor + * of runtime threads. + */ + if (!xfs_perag_intent_busy(sa->pag)) + return 0; + + if (sa->agf_bp) { + xfs_trans_brelse(sc->tp, sa->agf_bp); + sa->agf_bp = NULL; + } + + if (sa->agi_bp) { + xfs_trans_brelse(sc->tp, sa->agi_bp); + sa->agi_bp = NULL; + } + + error = xfs_perag_intent_drain(sa->pag); + if (error == -ERESTARTSYS) + error = -EINTR; + } while (!error); + + return error; +} + +/* + * Grab the per-AG structure, grab all AG header buffers, and wait until there + * aren't any pending intents. Returns -ENOENT if we can't grab the perag + * structure. + */ +int +xchk_ag_read_headers( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xchk_ag *sa) +{ + struct xfs_mount *mp = sc->mp; + + ASSERT(!sa->pag); + sa->pag = xfs_perag_get(mp, agno); + if (!sa->pag) + return -ENOENT; + + return xchk_perag_drain_and_lock(sc); +} + /* Release all the AG btree cursors. */ void xchk_ag_btcur_free( diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 66e99b0f6049..d2b2a1cb6533 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -7,6 +7,8 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index aaad13b1871f..756066f3dea2 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -7,6 +7,8 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_refcount.h" diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 8f0f33d07d2c..7551c3ec4ea5 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -373,7 +373,15 @@ xfs_bmap_update_get_group( xfs_agnumber_t agno; agno = XFS_FSB_TO_AGNO(mp, bi->bi_bmap.br_startblock); - bi->bi_pag = xfs_perag_get(mp, agno); + + /* + * Bump the intent count on behalf of the deferred rmap and refcount + * intent items that that we can queue when we finish this bmap work. + * This new intent item will bump the intent count before the bmap + * intent drops the intent count, ensuring that the intent count + * remains nonzero across the transaction roll. + */ + bi->bi_pag = xfs_perag_intent_get(mp, agno); } /* Release a passive AG ref after finishing mapping work. */ @@ -381,7 +389,7 @@ static inline void xfs_bmap_update_put_group( struct xfs_bmap_intent *bi) { - xfs_perag_put(bi->bi_pag); + xfs_perag_intent_put(bi->bi_pag); } /* Process a deferred rmap update. */ diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c new file mode 100644 index 000000000000..b431abdf0af1 --- /dev/null +++ b/fs/xfs/xfs_drain.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_ag.h" +#include "xfs_trace.h" + +void +xfs_defer_drain_init( + struct xfs_defer_drain *dr) +{ + atomic_set(&dr->dr_count, 0); + init_waitqueue_head(&dr->dr_waiters); +} + +void +xfs_defer_drain_free(struct xfs_defer_drain *dr) +{ + ASSERT(atomic_read(&dr->dr_count) == 0); +} + +/* Increase the pending intent count. */ +static inline void xfs_defer_drain_grab(struct xfs_defer_drain *dr) +{ + atomic_inc(&dr->dr_count); +} + +static inline bool has_waiters(struct wait_queue_head *wq_head) +{ + /* + * This memory barrier is paired with the one in set_current_state on + * the waiting side. + */ + smp_mb__after_atomic(); + return waitqueue_active(wq_head); +} + +/* Decrease the pending intent count, and wake any waiters, if appropriate. */ +static inline void xfs_defer_drain_rele(struct xfs_defer_drain *dr) +{ + if (atomic_dec_and_test(&dr->dr_count) && + has_waiters(&dr->dr_waiters)) + wake_up(&dr->dr_waiters); +} + +/* Are there intents pending? */ +static inline bool xfs_defer_drain_busy(struct xfs_defer_drain *dr) +{ + return atomic_read(&dr->dr_count) > 0; +} + +/* + * Wait for the pending intent count for a drain to hit zero. + * + * Callers must not hold any locks that would prevent intents from being + * finished. + */ +static inline int xfs_defer_drain_wait(struct xfs_defer_drain *dr) +{ + return wait_event_killable(dr->dr_waiters, !xfs_defer_drain_busy(dr)); +} + +/* + * Get a passive reference to an AG and declare an intent to update its + * metadata. + */ +struct xfs_perag * +xfs_perag_intent_get( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + if (!pag) + return NULL; + + xfs_perag_intent_hold(pag); + return pag; +} + +/* + * Release our intent to update this AG's metadata, and then release our + * passive ref to the AG. + */ +void +xfs_perag_intent_put( + struct xfs_perag *pag) +{ + xfs_perag_intent_rele(pag); + xfs_perag_put(pag); +} + +/* + * Declare an intent to update AG metadata. Other threads that need exclusive + * access can decide to back off if they see declared intentions. + */ +void +xfs_perag_intent_hold( + struct xfs_perag *pag) +{ + trace_xfs_perag_intent_hold(pag, __return_address); + xfs_defer_drain_grab(&pag->pag_intents_drain); +} + +/* Release our intent to update this AG's metadata. */ +void +xfs_perag_intent_rele( + struct xfs_perag *pag) +{ + trace_xfs_perag_intent_rele(pag, __return_address); + xfs_defer_drain_rele(&pag->pag_intents_drain); +} + +/* + * Wait for the intent update count for this AG to hit zero. + * Callers must not hold any AG header buffers. + */ +int +xfs_perag_intent_drain( + struct xfs_perag *pag) +{ + trace_xfs_perag_wait_intents(pag, __return_address); + return xfs_defer_drain_wait(&pag->pag_intents_drain); +} + +/* Has anyone declared an intent to update this AG? */ +bool +xfs_perag_intent_busy( + struct xfs_perag *pag) +{ + return xfs_defer_drain_busy(&pag->pag_intents_drain); +} diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h new file mode 100644 index 000000000000..9b16df3cc7dc --- /dev/null +++ b/fs/xfs/xfs_drain.h @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef XFS_DRAIN_H_ +#define XFS_DRAIN_H_ + +struct xfs_perag; + +#ifdef CONFIG_XFS_DRAIN_INTENTS +/* + * Passive drain mechanism. This data structure tracks a count of some items + * and contains a waitqueue for callers who would like to wake up when the + * count hits zero. + */ +struct xfs_defer_drain { + /* Number of items pending in some part of the filesystem. */ + atomic_t dr_count; + + /* Queue to wait for dri_count to go to zero */ + struct wait_queue_head dr_waiters; +}; + +void xfs_defer_drain_init(struct xfs_defer_drain *dr); +void xfs_defer_drain_free(struct xfs_defer_drain *dr); + +/* + * Deferred Work Intent Drains + * =========================== + * + * When a writer thread executes a chain of log intent items, the AG header + * buffer locks will cycle during a transaction roll to get from one intent + * item to the next in a chain. Although scrub takes all AG header buffer + * locks, this isn't sufficient to guard against scrub checking an AG while + * that writer thread is in the middle of finishing a chain because there's no + * higher level locking primitive guarding allocation groups. + * + * When there's a collision, cross-referencing between data structures (e.g. + * rmapbt and refcountbt) yields false corruption events; if repair is running, + * this results in incorrect repairs, which is catastrophic. + * + * The solution is to the perag structure the count of active intents and make + * scrub wait until it has both AG header buffer locks and the intent counter + * reaches zero. It is therefore critical that deferred work threads hold the + * AGI or AGF buffers when decrementing the intent counter. + * + * Given a list of deferred work items, the deferred work manager will complete + * a work item and all the sub-items that the parent item creates before moving + * on to the next work item in the list. This is also true for all levels of + * sub-items. Writer threads are permitted to queue multiple work items + * targetting the same AG, so a deferred work item (such as a BUI) that creates + * sub-items (such as RUIs) must bump the intent counter and maintain it until + * the sub-items can themselves bump the intent counter. + * + * Therefore, the intent count tracks entire lifetimes of deferred work items. + * All functions that create work items must increment the intent counter as + * soon as the item is added to the transaction and cannot drop the counter + * until the item is finished or cancelled. + */ +struct xfs_perag *xfs_perag_intent_get(struct xfs_mount *mp, + xfs_agnumber_t agno); +void xfs_perag_intent_put(struct xfs_perag *pag); + +void xfs_perag_intent_hold(struct xfs_perag *pag); +void xfs_perag_intent_rele(struct xfs_perag *pag); + +int xfs_perag_intent_drain(struct xfs_perag *pag); +bool xfs_perag_intent_busy(struct xfs_perag *pag); +#else +struct xfs_defer_drain { /* empty */ }; + +#define xfs_defer_drain_free(dr) ((void)0) +#define xfs_defer_drain_init(dr) ((void)0) + +#define xfs_perag_intent_get(mp, agno) xfs_perag_get((mp), (agno)) +#define xfs_perag_intent_put(pag) xfs_perag_put(pag) + +static inline void xfs_perag_intent_hold(struct xfs_perag *pag) { } +static inline void xfs_perag_intent_rele(struct xfs_perag *pag) { } + +#endif /* CONFIG_XFS_DRAIN_INTENTS */ + +#endif /* XFS_DRAIN_H_ */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 38b66fcfddc8..f9e36b810663 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -469,7 +469,7 @@ xfs_extent_free_get_group( xfs_agnumber_t agno; agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); - xefi->xefi_pag = xfs_perag_get(mp, agno); + xefi->xefi_pag = xfs_perag_intent_get(mp, agno); } /* Release a passive AG ref after some freeing work. */ @@ -477,7 +477,7 @@ static inline void xfs_extent_free_put_group( struct xfs_extent_free_item *xefi) { - xfs_perag_put(xefi->xefi_pag); + xfs_perag_intent_put(xefi->xefi_pag); } /* Process a free extent. */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index e88f18f85e4b..74dcb05069e8 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -80,6 +80,7 @@ typedef __u32 xfs_nlink_t; #include "xfs_cksum.h" #include "xfs_buf.h" #include "xfs_message.h" +#include "xfs_drain.h" #ifdef __BIG_ENDIAN #define XFS_NATIVE_HOST 1 diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7edee9590ed6..edd8587658d5 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -374,7 +374,7 @@ xfs_refcount_update_get_group( xfs_agnumber_t agno; agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock); - ri->ri_pag = xfs_perag_get(mp, agno); + ri->ri_pag = xfs_perag_intent_get(mp, agno); } /* Release a passive AG ref after finishing refcounting work. */ @@ -382,7 +382,7 @@ static inline void xfs_refcount_update_put_group( struct xfs_refcount_intent *ri) { - xfs_perag_put(ri->ri_pag); + xfs_perag_intent_put(ri->ri_pag); } /* Process a deferred refcount update. */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 739ddbd04a17..520c7ebdfed8 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -399,7 +399,7 @@ xfs_rmap_update_get_group( xfs_agnumber_t agno; agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock); - ri->ri_pag = xfs_perag_get(mp, agno); + ri->ri_pag = xfs_perag_intent_get(mp, agno); } /* Release a passive AG ref after finishing rmapping work. */ @@ -407,7 +407,7 @@ static inline void xfs_rmap_update_put_group( struct xfs_rmap_intent *ri) { - xfs_perag_put(ri->ri_pag); + xfs_perag_intent_put(ri->ri_pag); } /* Process a deferred rmap update. */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index db09bb771765..cd4ca5b1fcb0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2687,6 +2687,44 @@ DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred); DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_defer); DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_deferred); +DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, + void *item), + TP_ARGS(mp, dfp, item), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(void *, intent) + __field(void *, item) + __field(char, committed) + __field(int, nr) + ), + TP_fast_assign( + __entry->dev = mp ? mp->m_super->s_dev : 0; + __entry->type = dfp->dfp_type; + __entry->intent = dfp->dfp_intent; + __entry->item = item; + __entry->committed = dfp->dfp_done != NULL; + __entry->nr = dfp->dfp_count; + ), + TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->intent, + __entry->item, + __entry->committed, + __entry->nr) +) +#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_defer_pending_item_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \ + void *item), \ + TP_ARGS(mp, dfp, item)) + +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item); + /* rmap tracepoints */ DECLARE_EVENT_CLASS(xfs_rmap_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -4326,6 +4364,39 @@ TRACE_EVENT(xfs_force_shutdown, __entry->line_num) ); +#ifdef CONFIG_XFS_DRAIN_INTENTS +DECLARE_EVENT_CLASS(xfs_perag_intents_class, + TP_PROTO(struct xfs_perag *pag, void *caller_ip), + TP_ARGS(pag, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; + __entry->nr_intents = atomic_read(&pag->pag_intents_drain.dr_count); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_PERAG_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_perag_intents_class, name, \ + TP_PROTO(struct xfs_perag *pag, void *caller_ip), \ + TP_ARGS(pag, caller_ip)) +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_hold); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_rele); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); + +#endif /* CONFIG_XFS_DRAIN_INTENTS */ + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit From 8e698ee72c4ecbbf18264568eb310875839fd601 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 May 2023 09:14:36 +1000 Subject: xfs: set bnobt/cntbt numrecs correctly when formatting new AGs Through generic/300, I discovered that mkfs.xfs creates corrupt filesystems when given these parameters: # mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported Filesystems formatted with --unsupported are not supported!! meta-data=/dev/sda isize=512 agcount=8, agsize=16352 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=1 = reflink=1 bigtime=1 inobtcount=1 nrext64=1 data = bsize=4096 blocks=130816, imaxpct=25 = sunit=32 swidth=128 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=8192, version=2 = sectsz=512 sunit=32 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 = rgcount=0 rgsize=0 blks Discarding blocks...Done. # xfs_repair -n /dev/sda Phase 1 - find and verify superblock... - reporting progress in intervals of 15 minutes Phase 2 - using internal log - zero log... - 16:30:50: zeroing log - 16320 of 16320 blocks done - scan filesystem freespace and inode maps... agf_freeblks 25, counted 0 in ag 4 sb_fdblocks 8823, counted 8798 The root cause of this problem is the numrecs handling in xfs_freesp_init_recs, which is used to initialize a new AG. Prior to calling the function, we set up the new bnobt block with numrecs == 1 and rely on _freesp_init_recs to format that new record. If the last record created has a blockcount of zero, then it sets numrecs = 0. That last bit isn't correct if the AG contains the log, the start of the log is not immediately after the initial blocks due to stripe alignment, and the end of the log is perfectly aligned with the end of the AG. For this case, we actually formatted a single bnobt record to handle the free space before the start of the (stripe aligned) log, and incremented arec to try to format a second record. That second record turned out to be unnecessary, so what we really want is to leave numrecs at 1. The numrecs handling itself is overly complicated because a different function sets numrecs == 1. Change the bnobt creation code to start with numrecs set to zero and only increment it after successfully formatting a free space extent into the btree block. Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/xfs/libxfs/xfs_ag.c') diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 1b078bbbf225..9b373a0c7aaf 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -495,10 +495,12 @@ xfs_freesp_init_recs( ASSERT(start >= mp->m_ag_prealloc_blocks); if (start != mp->m_ag_prealloc_blocks) { /* - * Modify first record to pad stripe align of log + * Modify first record to pad stripe align of log and + * bump the record count. */ arec->ar_blockcount = cpu_to_be32(start - mp->m_ag_prealloc_blocks); + be16_add_cpu(&block->bb_numrecs, 1); nrec = arec + 1; /* @@ -509,7 +511,6 @@ xfs_freesp_init_recs( be32_to_cpu(arec->ar_startblock) + be32_to_cpu(arec->ar_blockcount)); arec = nrec; - be16_add_cpu(&block->bb_numrecs, 1); } /* * Change record start to after the internal log @@ -518,15 +519,13 @@ xfs_freesp_init_recs( } /* - * Calculate the record block count and check for the case where - * the log might have consumed all available space in the AG. If - * so, reset the record count to 0 to avoid exposure of an invalid - * record start block. + * Calculate the block count of this record; if it is nonzero, + * increment the record count. */ arec->ar_blockcount = cpu_to_be32(id->agsize - be32_to_cpu(arec->ar_startblock)); - if (!arec->ar_blockcount) - block->bb_numrecs = 0; + if (arec->ar_blockcount) + be16_add_cpu(&block->bb_numrecs, 1); } /* @@ -538,7 +537,7 @@ xfs_bnoroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno); xfs_freesp_init_recs(mp, bp, id); } @@ -548,7 +547,7 @@ xfs_cntroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno); xfs_freesp_init_recs(mp, bp, id); } -- cgit From 7dfee17b13e5024c5c0ab1911859ded4182de3e5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: xfs: validate block number being freed before adding to xefi Bad things happen in defered extent freeing operations if it is passed a bad block number in the xefi. This can come from a bogus agno/agbno pair from deferred agfl freeing, or just a bad fsbno being passed to __xfs_free_extent_later(). Either way, it's very difficult to diagnose where a null perag oops in EFI creation is coming from when the operation that queued the xefi has already been completed and there's no longer any trace of it around.... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 5 ++++- fs/xfs/libxfs/xfs_alloc.c | 16 +++++++++++++--- fs/xfs/libxfs/xfs_alloc.h | 6 +++--- fs/xfs/libxfs/xfs_bmap.c | 10 ++++++++-- fs/xfs/libxfs/xfs_bmap_btree.c | 7 +++++-- fs/xfs/libxfs/xfs_ialloc.c | 24 ++++++++++++++++-------- fs/xfs/libxfs/xfs_refcount.c | 13 ++++++++++--- fs/xfs/xfs_reflink.c | 4 +++- 8 files changed, 62 insertions(+), 23 deletions(-) (limited to 'fs/xfs/libxfs/xfs_ag.c') diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 9b373a0c7aaf..ee84835ebc66 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -984,7 +984,10 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); + err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, + true); + if (err2) + goto resv_err; /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 643d17877832..c20fe99405d8 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2431,7 +2431,7 @@ xfs_agfl_reset( * the real allocation can proceed. Deferring the free disconnects freeing up * the AGFL slot from freeing the block. */ -STATIC void +static int xfs_defer_agfl_block( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -2450,17 +2450,21 @@ xfs_defer_agfl_block( xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) + return -EFSCORRUPTED; + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); + return 0; } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ -void +int __xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, @@ -2487,6 +2491,9 @@ __xfs_free_extent_later( #endif ASSERT(xfs_extfree_item_cache != NULL); + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; @@ -2510,6 +2517,7 @@ __xfs_free_extent_later( xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); + return 0; } #ifdef DEBUG @@ -2670,7 +2678,9 @@ xfs_alloc_fix_freelist( goto out_agbp_relse; /* defer agfl frees */ - xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + if (error) + goto out_agbp_relse; } targs.tp = tp; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 5dbb25546d0b..85ac470be0da 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -230,7 +230,7 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } -void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, +int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, bool skip_discard); @@ -254,14 +254,14 @@ void xfs_extent_free_get_group(struct xfs_mount *mp, #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ -static inline void +static inline int xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo) { - __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, false); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index cd8870a16fd1..fef35696adb7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -5230,10 +5234,12 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_free_extent_later(tp, del->br_startblock, + error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); + if (error) + goto done; } } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 1b40e5f8b1ec..36564ae3084f 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -268,11 +268,14 @@ xfs_bmbt_free_block( struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; + int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); - ip->i_nblocks--; + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index a16d5de16933..34600f94c2f4 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1834,7 +1834,7 @@ retry: * might be sparse and only free the regions that are allocated as part of the * chunk. */ -STATIC void +static int xfs_difree_inode_chunk( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -1851,10 +1851,10 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); - return; + return xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, sagbno), + M_IGEO(mp)->ialloc_blks, + &XFS_RMAP_OINFO_INODES); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1871,6 +1871,8 @@ xfs_difree_inode_chunk( XFS_INOBT_HOLEMASK_BITS); nextbit = startidx + 1; while (startidx < XFS_INOBT_HOLEMASK_BITS) { + int error; + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, nextbit); /* @@ -1896,8 +1898,11 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, agbno), + contigblk, &XFS_RMAP_OINFO_INODES); + if (error) + return error; /* reset range to current bit and carry on... */ startidx = endidx = nextbit; @@ -1905,6 +1910,7 @@ xfs_difree_inode_chunk( next: nextbit++; } + return 0; } STATIC int @@ -2003,7 +2009,9 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + if (error) + goto error0; } else { xic->deleted = false; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index c1c65774dcc2..b6e21433925c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1151,8 +1151,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL); + if (error) + goto out_error; } (*agbno) += tmp.rc_blockcount; @@ -1210,8 +1212,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL); + if (error) + goto out_error; } skip: @@ -1976,7 +1980,10 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + error = xfs_free_extent_later(tp, fsb, + rr->rr_rrec.rc_blockcount, NULL); + if (error) + goto out_trans; error = xfs_trans_commit(tp); if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index f5dc46ce9803..abcc559f3c64 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -616,8 +616,10 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_free_extent_later(*tpp, del.br_startblock, + error = xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); + if (error) + break; /* Roll the transaction */ error = xfs_defer_finish(tpp); -- cgit