aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_alloc.c')
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c244
1 files changed, 179 insertions, 65 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6a037173d20d..c20fe99405d8 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -233,6 +233,52 @@ xfs_alloc_update(
return xfs_btree_update(cur, &rec);
}
+/* Convert the ondisk btree record to its incore representation. */
+void
+xfs_alloc_btrec_to_irec(
+ const union xfs_btree_rec *rec,
+ struct xfs_alloc_rec_incore *irec)
+{
+ irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+}
+
+/* Simple checks for free space records. */
+xfs_failaddr_t
+xfs_alloc_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (irec->ar_blockcount == 0)
+ return __this_address;
+
+ /* check for valid extent range, including overflow */
+ if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_alloc_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+
+ xfs_warn(mp,
+ "%s Freespace BTree record corruption in AG %d detected at %pS!",
+ cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
+ cur->bc_ag.pag->pag_agno, fa);
+ xfs_warn(mp,
+ "start block 0x%x block count 0x%x", irec->ar_startblock,
+ irec->ar_blockcount);
+ return -EFSCORRUPTED;
+}
+
/*
* Get the data from the pointed-to record.
*/
@@ -243,35 +289,23 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat) /* output: success/failure */
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_alloc_rec_incore irec;
union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !(*stat))
return error;
- *bno = be32_to_cpu(rec->alloc.ar_startblock);
- *len = be32_to_cpu(rec->alloc.ar_blockcount);
-
- if (*len == 0)
- goto out_bad_rec;
-
- /* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(pag, *bno, *len))
- goto out_bad_rec;
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
+ *bno = irec.ar_startblock;
+ *len = irec.ar_blockcount;
return 0;
-
-out_bad_rec:
- xfs_warn(mp,
- "%s Freespace BTree record corruption in AG %d detected!",
- cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
- pag->pag_agno);
- xfs_warn(mp,
- "start block 0x%x block count 0x%x", *bno, *len);
- return -EFSCORRUPTED;
}
/*
@@ -594,6 +628,25 @@ xfs_alloc_fixup_trees(
return 0;
}
+/*
+ * We do not verify the AGFL contents against AGF-based index counters here,
+ * even though we may have access to the perag that contains shadow copies. We
+ * don't know if the AGF based counters have been checked, and if they have they
+ * still may be inconsistent because they haven't yet been reset on the first
+ * allocation after the AGF has been read in.
+ *
+ * This means we can only check that all agfl entries contain valid or null
+ * values because we can't reliably determine the active range to exclude
+ * NULLAGBNO as a valid value.
+ *
+ * However, we can't even do that for v4 format filesystems because there are
+ * old versions of mkfs out there that does not initialise the AGFL to known,
+ * verifiable values. HEnce we can't tell the difference between a AGFL block
+ * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
+ *
+ * As a result, we can only fully validate AGFL block numbers when we pull them
+ * from the freelist in xfs_alloc_get_freelist().
+ */
static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
@@ -603,12 +656,6 @@ xfs_agfl_verify(
__be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
int i;
- /*
- * There is no verification of non-crc AGFLs because mkfs does not
- * initialise the AGFL to zero or NULL. Hence the only valid part of the
- * AGFL is what the AGF says is active. We can't get to the AGF, so we
- * can't verify just those entries are valid.
- */
if (!xfs_has_crc(mp))
return NULL;
@@ -2287,12 +2334,16 @@ xfs_free_agfl_block(
}
/*
- * Check the agfl fields of the agf for inconsistency or corruption. The purpose
- * is to detect an agfl header padding mismatch between current and early v5
- * kernels. This problem manifests as a 1-slot size difference between the
- * on-disk flcount and the active [first, last] range of a wrapped agfl. This
- * may also catch variants of agfl count corruption unrelated to padding. Either
- * way, we'll reset the agfl and warn the user.
+ * Check the agfl fields of the agf for inconsistency or corruption.
+ *
+ * The original purpose was to detect an agfl header padding mismatch between
+ * current and early v5 kernels. This problem manifests as a 1-slot size
+ * difference between the on-disk flcount and the active [first, last] range of
+ * a wrapped agfl.
+ *
+ * However, we need to use these same checks to catch agfl count corruptions
+ * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
+ * way, we need to reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
@@ -2308,10 +2359,6 @@ xfs_agfl_needs_reset(
int agfl_size = xfs_agfl_size(mp);
int active;
- /* no agfl header on v4 supers */
- if (!xfs_has_crc(mp))
- return false;
-
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
@@ -2384,7 +2431,7 @@ xfs_agfl_reset(
* the real allocation can proceed. Deferring the free disconnects freeing up
* the AGFL slot from freeing the block.
*/
-STATIC void
+static int
xfs_defer_agfl_block(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -2403,16 +2450,21 @@ xfs_defer_agfl_block(
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
+ return -EFSCORRUPTED;
+
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+ return 0;
}
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
-void
+int
__xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
@@ -2421,8 +2473,8 @@ __xfs_free_extent_later(
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
-#ifdef DEBUG
struct xfs_mount *mp = tp->t_mountp;
+#ifdef DEBUG
xfs_agnumber_t agno;
xfs_agblock_t agbno;
@@ -2439,6 +2491,9 @@ __xfs_free_extent_later(
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
@@ -2456,10 +2511,13 @@ __xfs_free_extent_later(
} else {
xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(tp->t_mountp,
+ trace_xfs_bmap_free_defer(mp,
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ return 0;
}
#ifdef DEBUG
@@ -2620,7 +2678,9 @@ xfs_alloc_fix_freelist(
goto out_agbp_relse;
/* defer agfl frees */
- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ if (error)
+ goto out_agbp_relse;
}
targs.tp = tp;
@@ -2730,6 +2790,9 @@ xfs_alloc_get_freelist(
*/
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
+ return -EFSCORRUPTED;
+
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
@@ -2852,6 +2915,19 @@ xfs_alloc_put_freelist(
return 0;
}
+/*
+ * Verify the AGF is consistent.
+ *
+ * We do not verify the AGFL indexes in the AGF are fully consistent here
+ * because of issues with variable on-disk structure sizes. Instead, we check
+ * the agfl indexes for consistency when we initialise the perag from the AGF
+ * information after a read completes.
+ *
+ * If the index is inconsistent, then we mark the perag as needing an AGFL
+ * reset. The first AGFL update performed then resets the AGFL indexes and
+ * refills the AGFL with known good free blocks, allowing the filesystem to
+ * continue operating normally at the cost of a few leaked free space blocks.
+ */
static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
@@ -2925,7 +3001,6 @@ xfs_agf_verify(
return __this_address;
return NULL;
-
}
static void
@@ -3045,6 +3120,8 @@ xfs_alloc_read_agf(
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
if (xfs_agfl_needs_reset(pag->pag_mount, agf))
set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
+ else
+ clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
/*
* Update the in-core allocbt counter. Filter out the rmapbt
@@ -3148,7 +3225,8 @@ xfs_alloc_vextent_check_args(
*/
static int
xfs_alloc_vextent_prepare_ag(
- struct xfs_alloc_arg *args)
+ struct xfs_alloc_arg *args,
+ uint32_t flags)
{
bool need_pag = !args->pag;
int error;
@@ -3157,7 +3235,7 @@ xfs_alloc_vextent_prepare_ag(
args->pag = xfs_perag_get(args->mp, args->agno);
args->agbp = NULL;
- error = xfs_alloc_fix_freelist(args, 0);
+ error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
if (need_pag)
@@ -3255,6 +3333,8 @@ xfs_alloc_vextent_finish(
XFS_STATS_INC(mp, xs_allocx);
XFS_STATS_ADD(mp, xs_allocb, args->len);
+ trace_xfs_alloc_vextent_finish(args);
+
out_drop_perag:
if (drop_perag && args->pag) {
xfs_perag_rele(args->pag);
@@ -3279,8 +3359,14 @@ xfs_alloc_vextent_this_ag(
xfs_agnumber_t minimum_agno;
int error;
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == agno);
+
args->agno = agno;
args->agbno = 0;
+
+ trace_xfs_alloc_vextent_this_ag(args);
+
error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
&minimum_agno);
if (error) {
@@ -3289,7 +3375,7 @@ xfs_alloc_vextent_this_ag(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_size(args);
@@ -3323,14 +3409,17 @@ xfs_alloc_vextent_iterate_ags(
uint32_t flags)
{
struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t restart_agno = minimum_agno;
xfs_agnumber_t agno;
int error = 0;
+ if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+ restart_agno = 0;
restart:
- for_each_perag_wrap_range(mp, start_agno, minimum_agno,
+ for_each_perag_wrap_range(mp, start_agno, restart_agno,
mp->m_sb.sb_agcount, agno, args->pag) {
args->agno = agno;
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, flags);
if (error)
break;
if (!args->agbp) {
@@ -3366,6 +3455,7 @@ restart:
*/
if (flags) {
flags = 0;
+ restart_agno = minimum_agno;
goto restart;
}
@@ -3394,8 +3484,13 @@ xfs_alloc_vextent_start_ag(
bool bump_rotor = false;
int error;
+ ASSERT(args->pag == NULL);
+
args->agno = NULLAGNUMBER;
args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_start_ag(args);
+
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
@@ -3442,8 +3537,13 @@ xfs_alloc_vextent_first_ag(
xfs_agnumber_t start_agno;
int error;
+ ASSERT(args->pag == NULL);
+
args->agno = NULLAGNUMBER;
args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_first_ag(args);
+
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
@@ -3470,8 +3570,14 @@ xfs_alloc_vextent_exact_bno(
xfs_agnumber_t minimum_agno;
int error;
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+
+ trace_xfs_alloc_vextent_exact_bno(args);
+
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
@@ -3479,7 +3585,7 @@ xfs_alloc_vextent_exact_bno(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_exact(args);
@@ -3502,8 +3608,14 @@ xfs_alloc_vextent_near_bno(
bool needs_perag = args->pag == NULL;
int error;
+ if (!needs_perag)
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+
+ trace_xfs_alloc_vextent_near_bno(args);
+
error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
@@ -3514,7 +3626,7 @@ xfs_alloc_vextent_near_bno(
if (needs_perag)
args->pag = xfs_perag_grab(mp, args->agno);
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_near(args);
@@ -3560,7 +3672,8 @@ xfs_free_extent_fix_freelist(
int
__xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
@@ -3568,12 +3681,9 @@ __xfs_free_extent(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
struct xfs_agf *agf;
int error;
unsigned int busy_flags = 0;
- struct xfs_perag *pag;
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
@@ -3582,10 +3692,9 @@ __xfs_free_extent(
XFS_ERRTAG_FREE_EXTENT))
return -EIO;
- pag = xfs_perag_get(mp, agno);
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error)
- goto err;
+ return error;
agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
@@ -3599,20 +3708,18 @@ __xfs_free_extent(
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
+ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
+ type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
- xfs_perag_put(pag);
return 0;
err_release:
xfs_trans_brelse(tp, agbp);
-err:
- xfs_perag_put(pag);
return error;
}
@@ -3630,9 +3737,13 @@ xfs_alloc_query_range_helper(
{
struct xfs_alloc_query_range_info *query = priv;
struct xfs_alloc_rec_incore irec;
+ xfs_failaddr_t fa;
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
- irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
- irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
return query->fn(cur, &irec, query->priv);
}
@@ -3673,13 +3784,16 @@ xfs_alloc_query_all(
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the free space and tell us if the area has no
+ * records, is fully mapped by records, or is partially filled.
+ */
int
-xfs_alloc_has_record(
+xfs_alloc_has_records(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
union xfs_btree_irec low;
union xfs_btree_irec high;
@@ -3689,7 +3803,7 @@ xfs_alloc_has_record(
memset(&high, 0xFF, sizeof(high));
high.a.ar_startblock = bno + len - 1;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}
/*