diff options
Diffstat (limited to 'fs/xfs')
134 files changed, 5245 insertions, 3439 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 35faf128f36d..1b98cfa342ab 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -96,3 +96,16 @@ config XFS_DEBUG not useful unless you are debugging a particular problem. Say N unless you are an XFS developer, or you play one on TV. + +config XFS_ASSERT_FATAL + bool "XFS fatal asserts" + default y + depends on XFS_FS && XFS_DEBUG + help + Set the default DEBUG mode ASSERT failure behavior. + + Say Y here to cause DEBUG mode ASSERT failures to result in fatal + errors that BUG() the kernel by default. If you say N, ASSERT failures + result in warnings. + + This behavior can be modified at runtime via sysfs. diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 26ef1958b65b..a6e955bfead8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -79,6 +79,7 @@ xfs-y += xfs_aops.o \ xfs_extent_busy.o \ xfs_file.o \ xfs_filestream.o \ + xfs_fsmap.o \ xfs_fsops.o \ xfs_globals.o \ xfs_icache.o \ @@ -97,8 +98,7 @@ xfs-y += xfs_aops.o \ xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ - kmem.o \ - uuid.o + kmem.o # low-level transaction/log code xfs-y += xfs_log.o \ diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 339c696bbc01..393b6849aeb3 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -16,6 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/mm.h> +#include <linux/sched/mm.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/swap.h> @@ -24,24 +25,6 @@ #include "kmem.h" #include "xfs_message.h" -/* - * Greedy allocation. May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = vzalloc(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - void * kmem_alloc(size_t size, xfs_km_flags_t flags) { @@ -65,7 +48,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) void * kmem_zalloc_large(size_t size, xfs_km_flags_t flags) { - unsigned noio_flag = 0; + unsigned nofs_flag = 0; void *ptr; gfp_t lflags; @@ -77,17 +60,17 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) * __vmalloc() will allocate data pages and auxillary structures (e.g. * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context * here. Hence we need to tell memory reclaim that we are in such a - * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering + * context via PF_MEMALLOC_NOFS to prevent memory reclaim re-entering * the filesystem here and potentially deadlocking. */ - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - noio_flag = memalloc_noio_save(); + if (flags & KM_NOFS) + nofs_flag = memalloc_nofs_save(); lflags = kmem_flags_convert(flags); - ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL); - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - memalloc_noio_restore(noio_flag); + if (flags & KM_NOFS) + memalloc_nofs_restore(nofs_flag); return ptr; } diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..4d85992d75b2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -50,10 +50,20 @@ kmem_flags_convert(xfs_km_flags_t flags) lflags = GFP_ATOMIC | __GFP_NOWARN; } else { lflags = GFP_KERNEL | __GFP_NOWARN; - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + if (flags & KM_NOFS) lflags &= ~__GFP_FS; } + /* + * Default page/slab allocator behavior is to retry for ever + * for small allocations. We can override this behavior by using + * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long + * as it is feasible but rather fail than retry forever for all + * request sizes. + */ + if (flags & KM_MAYFAIL) + lflags |= __GFP_RETRY_MAYFAIL; + if (flags & KM_ZERO) lflags |= __GFP_ZERO; @@ -69,8 +79,6 @@ static inline void kmem_free(const void *ptr) } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - static inline void * kmem_zalloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 33db69be4832..b008ff3250eb 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -111,8 +111,7 @@ xfs_ag_resv_critical( /* Critically low if less than 10% or max btree height remains. */ return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, - pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL, - XFS_RANDOM_AG_RESV_CRITICAL); + pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); } /* diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9f06a211e157..744dcaec34cc 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -221,20 +221,22 @@ xfs_alloc_get_rec( * Compute aligned version of the found extent. * Takes alignment and min length into account. */ -STATIC void +STATIC bool xfs_alloc_compute_aligned( xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ xfs_agblock_t *resbno, /* result block number */ - xfs_extlen_t *reslen) /* result length */ + xfs_extlen_t *reslen, /* result length */ + unsigned *busy_gen) { - xfs_agblock_t bno; - xfs_extlen_t len; + xfs_agblock_t bno = foundbno; + xfs_extlen_t len = foundlen; xfs_extlen_t diff; + bool busy; /* Trim busy sections out of found extent */ - xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); /* * If we have a largish extent that happens to start before min_agbno, @@ -259,6 +261,8 @@ xfs_alloc_compute_aligned( *resbno = bno; *reslen = len; } + + return busy; } /* @@ -602,7 +606,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { /* * Read in the allocation group free block array. */ -STATIC int /* error */ +int /* error */ xfs_alloc_read_agfl( xfs_mount_t *mp, /* mount point structure */ xfs_trans_t *tp, /* transaction pointer */ @@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact( int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ - xfs_agblock_t tbno; /* start block of trimmed extent */ - xfs_extlen_t tlen; /* length of trimmed extent */ - xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t tbno; /* start block of busy extent */ + xfs_extlen_t tlen; /* length of busy extent */ + xfs_agblock_t tend; /* end block of busy extent */ int i; /* success/failure of operation */ + unsigned busy_gen; ASSERT(args->alignment == 1); @@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact( /* * Check for overlapping busy extents. */ - xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); + tbno = fbno; + tlen = flen; + xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); /* * Give up if the start of the extent is busy, or the freespace isn't @@ -853,6 +860,7 @@ xfs_alloc_find_best_extent( xfs_agblock_t sdiff; int error; int i; + unsigned busy_gen; /* The good extent is perfect, no need to search. */ if (!gdiff) @@ -866,7 +874,8 @@ xfs_alloc_find_best_extent( if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); + xfs_alloc_compute_aligned(args, *sbno, *slen, + sbnoa, slena, &busy_gen); /* * The good extent is closer than this one. @@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near( xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; + bool busy; + unsigned busy_gen; #ifdef DEBUG /* * Randomly don't execute the first algorithm. @@ -982,6 +992,7 @@ restart: ltlen = 0; gtlena = 0; ltlena = 0; + busy = false; /* * Get a cursor for the by-size btree. @@ -1064,8 +1075,8 @@ restart: if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); + busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena, &busy_gen); if (ltlena < args->minlen) continue; if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) @@ -1183,8 +1194,8 @@ restart: if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); + busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena, &busy_gen); if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) break; if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) @@ -1199,8 +1210,8 @@ restart: if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, gtbno, gtlen, - >bnoa, >lena); + busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena, &busy_gen); if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) break; if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) @@ -1261,9 +1272,9 @@ restart: if (bno_cur_lt == NULL && bno_cur_gt == NULL) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - if (!forced++) { + if (busy) { trace_xfs_alloc_near_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, args->pag, busy_gen); goto restart; } trace_xfs_alloc_size_neither(args); @@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; + bool busy; + unsigned busy_gen; restart: /* @@ -1353,6 +1365,7 @@ restart: cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + busy = false; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1362,14 +1375,13 @@ restart: goto error0; /* - * If none or we have busy extents that we cannot allocate from, then - * we have to settle for a smaller extent. In the case that there are - * no large extents, this will return the last entry in the tree unless - * the tree is empty. In the case that there are only busy large - * extents, this will return the largest small extent unless there + * If none then we have to settle for a smaller extent. In the case that + * there are no large extents, this will return the last entry in the + * tree unless the tree is empty. In the case that there are only busy + * large extents, this will return the largest small extent unless there * are no smaller extents available. */ - if (!i || forced > 1) { + if (!i) { error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, &flen, &i); if (error) @@ -1380,13 +1392,11 @@ restart: return 0; } ASSERT(i == 1); - xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, + &rlen, &busy_gen); } else { /* * Search for a non-busy extent that is large enough. - * If we are at low space, don't check, or if we fall of - * the end of the btree, turn off the busy check and - * restart. */ for (;;) { error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); @@ -1394,8 +1404,8 @@ restart: goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen, &busy_gen); if (rlen >= args->maxlen) break; @@ -1407,18 +1417,13 @@ restart: /* * Our only valid extents must have been busy. * Make it unbusy by forcing the log out and - * retrying. If we've been here before, forcing - * the log isn't making the extents available, - * which means they have probably been freed in - * this transaction. In that case, we have to - * give up on them and we'll attempt a minlen - * allocation the next time around. + * retrying. */ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_busy(args); - if (!forced++) - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, + args->pag, busy_gen); goto restart; } } @@ -1454,8 +1459,8 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); if (flen < bestrlen) break; - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen, &busy_gen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), @@ -1484,10 +1489,10 @@ restart: */ args->len = rlen; if (rlen < args->minlen) { - if (!forced++) { + if (busy) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, args->pag, busy_gen); goto restart; } goto out_nominleft; @@ -2449,8 +2454,7 @@ xfs_agf_read_verify( !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) xfs_buf_ioerror(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF)) + XFS_ERRTAG_ALLOC_READ_AGF)) xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) @@ -2659,21 +2663,11 @@ xfs_alloc_vextent( args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_NEAR_BNO; /* FALLTHROUGH */ - case XFS_ALLOCTYPE_ANY_AG: - case XFS_ALLOCTYPE_START_AG: case XFS_ALLOCTYPE_FIRST_AG: /* * Rotate through the allocation groups looking for a winner. */ - if (type == XFS_ALLOCTYPE_ANY_AG) { - /* - * Start with the last place we left off. - */ - args->agno = sagno = (mp->m_agfrotor / rotorstep) % - mp->m_sb.sb_agcount; - args->type = XFS_ALLOCTYPE_THIS_AG; - flags = XFS_ALLOC_FLAG_TRYLOCK; - } else if (type == XFS_ALLOCTYPE_FIRST_AG) { + if (type == XFS_ALLOCTYPE_FIRST_AG) { /* * Start with allocation group given by bno. */ @@ -2682,8 +2676,6 @@ xfs_alloc_vextent( sagno = 0; flags = 0; } else { - if (type == XFS_ALLOCTYPE_START_AG) - args->type = XFS_ALLOCTYPE_THIS_AG; /* * Start with the given allocation group. */ @@ -2751,7 +2743,7 @@ xfs_alloc_vextent( } xfs_perag_put(args->pag); } - if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { + if (bump_rotor) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); @@ -2849,8 +2841,7 @@ xfs_free_extent( ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_FREE_EXTENT, - XFS_RANDOM_FREE_EXTENT)) + XFS_ERRTAG_FREE_EXTENT)) return -EIO; error = xfs_free_extent_fix_freelist(tp, agno, &agbp); @@ -2875,3 +2866,60 @@ err: xfs_trans_brelse(tp, agbp); return error; } + +struct xfs_alloc_query_range_info { + xfs_alloc_query_range_fn fn; + void *priv; +}; + +/* Format btree record and pass to our callback. */ +STATIC int +xfs_alloc_query_range_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_alloc_query_range_info *query = priv; + struct xfs_alloc_rec_incore irec; + + irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); + irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); + return query->fn(cur, &irec, query->priv); +} + +/* Find all free space within a given range of blocks. */ +int +xfs_alloc_query_range( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *low_rec, + struct xfs_alloc_rec_incore *high_rec, + xfs_alloc_query_range_fn fn, + void *priv) +{ + union xfs_btree_irec low_brec; + union xfs_btree_irec high_brec; + struct xfs_alloc_query_range_info query; + + ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + low_brec.a = *low_rec; + high_brec.a = *high_rec; + query.priv = priv; + query.fn = fn; + return xfs_btree_query_range(cur, &low_brec, &high_brec, + xfs_alloc_query_range_helper, &query); +} + +/* Find all free space records. */ +int +xfs_alloc_query_all( + struct xfs_btree_cur *cur, + xfs_alloc_query_range_fn fn, + void *priv) +{ + struct xfs_alloc_query_range_info query; + + ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + query.priv = priv; + query.fn = fn; + return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 1d0f48a501a3..ef26edc2e938 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -29,9 +29,7 @@ extern struct workqueue_struct *xfs_alloc_wq; /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ -#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */ #define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ -#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */ #define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ #define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ #define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ @@ -41,9 +39,7 @@ extern struct workqueue_struct *xfs_alloc_wq; typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_TYPES \ - { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ - { XFS_ALLOCTYPE_START_AG, "START_AG" }, \ { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ @@ -217,10 +213,24 @@ xfs_alloc_get_rec( int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); +int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, struct xfs_buf **bpp); int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **agbp); xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); +typedef int (*xfs_alloc_query_range_fn)( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv); + +int xfs_alloc_query_range(struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *low_rec, + struct xfs_alloc_rec_incore *high_rec, + xfs_alloc_query_range_fn fn, void *priv); +int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, + void *priv); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index efb467b10a71..cfde0a0f9706 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -205,19 +205,37 @@ xfs_allocbt_init_key_from_rec( union xfs_btree_key *key, union xfs_btree_rec *rec) { - ASSERT(rec->alloc.ar_startblock != 0); - key->alloc.ar_startblock = rec->alloc.ar_startblock; key->alloc.ar_blockcount = rec->alloc.ar_blockcount; } STATIC void +xfs_bnobt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + __u32 x; + + x = be32_to_cpu(rec->alloc.ar_startblock); + x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; + key->alloc.ar_startblock = cpu_to_be32(x); + key->alloc.ar_blockcount = 0; +} + +STATIC void +xfs_cntbt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->alloc.ar_blockcount = rec->alloc.ar_blockcount; + key->alloc.ar_startblock = 0; +} + +STATIC void xfs_allocbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - ASSERT(cur->bc_rec.a.ar_startblock != 0); - rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); } @@ -235,25 +253,58 @@ xfs_allocbt_init_ptr_from_cur( ptr->s = agf->agf_roots[cur->bc_btnum]; } -STATIC __int64_t -xfs_allocbt_key_diff( +STATIC int64_t +xfs_bnobt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) { xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; xfs_alloc_key_t *kp = &key->alloc; - __int64_t diff; - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return (__int64_t)be32_to_cpu(kp->ar_startblock) - - rec->ar_startblock; - } + return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + +STATIC int64_t +xfs_cntbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; + xfs_alloc_key_t *kp = &key->alloc; + int64_t diff; + + diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; + if (diff) + return diff; + + return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + +STATIC int64_t +xfs_bnobt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - + be32_to_cpu(k2->alloc.ar_startblock); +} + +STATIC int64_t +xfs_cntbt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + int64_t diff; - diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; + diff = be32_to_cpu(k1->alloc.ar_blockcount) - + be32_to_cpu(k2->alloc.ar_blockcount); if (diff) return diff; - return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; + return be32_to_cpu(k1->alloc.ar_startblock) - + be32_to_cpu(k2->alloc.ar_startblock); } static bool @@ -344,46 +395,54 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { }; -#if defined(DEBUG) || defined(XFS_WARN) STATIC int -xfs_allocbt_keys_inorder( +xfs_bnobt_keys_inorder( struct xfs_btree_cur *cur, union xfs_btree_key *k1, union xfs_btree_key *k2) { - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock); - } else { - return be32_to_cpu(k1->alloc.ar_blockcount) < - be32_to_cpu(k2->alloc.ar_blockcount) || - (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && - be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock)); - } + return be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock); } STATIC int -xfs_allocbt_recs_inorder( +xfs_bnobt_recs_inorder( struct xfs_btree_cur *cur, union xfs_btree_rec *r1, union xfs_btree_rec *r2) { - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(r1->alloc.ar_startblock) + - be32_to_cpu(r1->alloc.ar_blockcount) <= - be32_to_cpu(r2->alloc.ar_startblock); - } else { - return be32_to_cpu(r1->alloc.ar_blockcount) < - be32_to_cpu(r2->alloc.ar_blockcount) || - (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && - be32_to_cpu(r1->alloc.ar_startblock) < - be32_to_cpu(r2->alloc.ar_startblock)); - } + return be32_to_cpu(r1->alloc.ar_startblock) + + be32_to_cpu(r1->alloc.ar_blockcount) <= + be32_to_cpu(r2->alloc.ar_startblock); +} + +STATIC int +xfs_cntbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->alloc.ar_blockcount) < + be32_to_cpu(k2->alloc.ar_blockcount) || + (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && + be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock)); +} + +STATIC int +xfs_cntbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->alloc.ar_blockcount) < + be32_to_cpu(r2->alloc.ar_blockcount) || + (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && + be32_to_cpu(r1->alloc.ar_startblock) < + be32_to_cpu(r2->alloc.ar_startblock)); } -#endif /* DEBUG */ -static const struct xfs_btree_ops xfs_allocbt_ops = { +static const struct xfs_btree_ops xfs_bnobt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -395,14 +454,36 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, - .key_diff = xfs_allocbt_key_diff, + .key_diff = xfs_bnobt_key_diff, .buf_ops = &xfs_allocbt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_allocbt_keys_inorder, - .recs_inorder = xfs_allocbt_recs_inorder, -#endif + .diff_two_keys = xfs_bnobt_diff_two_keys, + .keys_inorder = xfs_bnobt_keys_inorder, + .recs_inorder = xfs_bnobt_recs_inorder, +}; + +static const struct xfs_btree_ops xfs_cntbt_ops = { + .rec_len = sizeof(xfs_alloc_rec_t), + .key_len = sizeof(xfs_alloc_key_t), + + .dup_cursor = xfs_allocbt_dup_cursor, + .set_root = xfs_allocbt_set_root, + .alloc_block = xfs_allocbt_alloc_block, + .free_block = xfs_allocbt_free_block, + .update_lastrec = xfs_allocbt_update_lastrec, + .get_minrecs = xfs_allocbt_get_minrecs, + .get_maxrecs = xfs_allocbt_get_maxrecs, + .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, + .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, + .key_diff = xfs_cntbt_key_diff, + .buf_ops = &xfs_allocbt_buf_ops, + .diff_two_keys = xfs_cntbt_diff_two_keys, + .keys_inorder = xfs_cntbt_keys_inorder, + .recs_inorder = xfs_cntbt_recs_inorder, }; /* @@ -427,16 +508,15 @@ xfs_allocbt_init_cursor( cur->bc_mp = mp; cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_BNO) - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); - else - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); if (btnum == XFS_BTNUM_CNT) { + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); + cur->bc_ops = &xfs_cntbt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; } else { + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); + cur->bc_ops = &xfs_bnobt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); } diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6622d46ddec3..de7b9bd30bec 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -114,6 +114,25 @@ xfs_inode_hasattr( * Overall external interface routines. *========================================================================*/ +/* Retrieve an extended attribute and its value. Must have ilock. */ +int +xfs_attr_get_ilocked( + struct xfs_inode *ip, + struct xfs_da_args *args) +{ + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + + if (!xfs_inode_hasattr(ip)) + return -ENOATTR; + else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + return xfs_attr_shortform_getvalue(args); + else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) + return xfs_attr_leaf_get(args); + else + return xfs_attr_node_get(args); +} + +/* Retrieve an extended attribute by name, and its value. */ int xfs_attr_get( struct xfs_inode *ip, @@ -141,14 +160,7 @@ xfs_attr_get( args.op_flags = XFS_DA_OP_OKNOENT; lock_mode = xfs_ilock_attr_map_shared(ip); - if (!xfs_inode_hasattr(ip)) - error = -ENOATTR; - else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) - error = xfs_attr_shortform_getvalue(&args); - else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) - error = xfs_attr_leaf_get(&args); - else - error = xfs_attr_node_get(&args); + error = xfs_attr_get_ilocked(ip, &args); xfs_iunlock(ip, lock_mode); *valuelenp = args.valuelen; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 2852521fc8ec..c6c15e5717e4 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -351,7 +351,7 @@ xfs_attr3_leaf_read( err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d52f525f5b2d..5236d8e45146 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -253,7 +253,7 @@ xfs_attr_rmtval_copyout( xfs_ino_t ino, int *offset, int *valuelen, - __uint8_t **dst) + uint8_t **dst) { char *src = bp->b_addr; xfs_daddr_t bno = bp->b_bn; @@ -301,7 +301,7 @@ xfs_attr_rmtval_copyin( xfs_ino_t ino, int *offset, int *valuelen, - __uint8_t **src) + uint8_t **src) { char *dst = bp->b_addr; xfs_daddr_t bno = bp->b_bn; @@ -355,7 +355,7 @@ xfs_attr_rmtval_get( struct xfs_mount *mp = args->dp->i_mount; struct xfs_buf *bp; xfs_dablk_t lblkno = args->rmtblkno; - __uint8_t *dst = args->value; + uint8_t *dst = args->value; int valuelen; int nmap; int error; @@ -386,7 +386,8 @@ xfs_attr_rmtval_get( (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + error = xfs_trans_read_buf(mp, args->trans, + mp->m_ddev_targp, dblkno, dblkcnt, 0, &bp, &xfs_attr3_rmt_buf_ops); if (error) @@ -395,7 +396,7 @@ xfs_attr_rmtval_get( error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, &dst); - xfs_buf_relse(bp); + xfs_trans_brelse(args->trans, bp); if (error) return error; @@ -421,7 +422,7 @@ xfs_attr_rmtval_set( struct xfs_bmbt_irec map; xfs_dablk_t lblkno; xfs_fileoff_t lfileoff = 0; - __uint8_t *src = args->value; + uint8_t *src = args->value; int blkcnt; int valuelen; int nmap; diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 90928bbe693c..afd684ae3136 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -31,10 +31,10 @@ typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; * We generate this then sort it, attr_list() must return things in hash-order. */ typedef struct xfs_attr_sf_sort { - __uint8_t entno; /* entry number in original list */ - __uint8_t namelen; /* length of name value (no null) */ - __uint8_t valuelen; /* length of value */ - __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ + uint8_t entno; /* entry number in original list */ + uint8_t namelen; /* length of name value (no null) */ + uint8_t valuelen; /* length of value */ + uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ xfs_dahash_t hash; /* this entry's hash value */ unsigned char *name; /* name value, pointer into buffer */ } xfs_attr_sf_sort_t; @@ -42,7 +42,7 @@ typedef struct xfs_attr_sf_sort { #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ (((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))) #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ - ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1) + ((1 << (NBBY*(int)sizeof(uint8_t))) - 1) #define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \ ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen) #define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h index e1649c0d3e02..61c6b2025d0c 100644 --- a/fs/xfs/libxfs/xfs_bit.h +++ b/fs/xfs/libxfs/xfs_bit.h @@ -25,47 +25,47 @@ /* * masks with n high/low bits set, 64-bit values */ -static inline __uint64_t xfs_mask64hi(int n) +static inline uint64_t xfs_mask64hi(int n) { - return (__uint64_t)-1 << (64 - (n)); + return (uint64_t)-1 << (64 - (n)); } -static inline __uint32_t xfs_mask32lo(int n) +static inline uint32_t xfs_mask32lo(int n) { - return ((__uint32_t)1 << (n)) - 1; + return ((uint32_t)1 << (n)) - 1; } -static inline __uint64_t xfs_mask64lo(int n) +static inline uint64_t xfs_mask64lo(int n) { - return ((__uint64_t)1 << (n)) - 1; + return ((uint64_t)1 << (n)) - 1; } /* Get high bit set out of 32-bit argument, -1 if none set */ -static inline int xfs_highbit32(__uint32_t v) +static inline int xfs_highbit32(uint32_t v) { return fls(v) - 1; } /* Get high bit set out of 64-bit argument, -1 if none set */ -static inline int xfs_highbit64(__uint64_t v) +static inline int xfs_highbit64(uint64_t v) { return fls64(v) - 1; } /* Get low bit set out of 32-bit argument, -1 if none set */ -static inline int xfs_lowbit32(__uint32_t v) +static inline int xfs_lowbit32(uint32_t v) { return ffs(v) - 1; } /* Get low bit set out of 64-bit argument, -1 if none set */ -static inline int xfs_lowbit64(__uint64_t v) +static inline int xfs_lowbit64(uint64_t v) { - __uint32_t w = (__uint32_t)v; + uint32_t w = (uint32_t)v; int n = 0; if (w) { /* lower bits */ n = ffs(w); } else { /* upper bits */ - w = (__uint32_t)(v >> 32); + w = (uint32_t)(v >> 32); if (w) { n = ffs(w); if (n) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index bfc00de5c6f1..0a9880777c9c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -740,15 +740,9 @@ xfs_bmap_extents_to_btree( * Fill in the root. */ block = ifp->if_broot; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 1, 1, ip->i_ino, + xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 1, 1, ip->i_ino, XFS_BTREE_LONG_PTRS); - /* * Need a cursor. Can't allocate until bb_level is filled in. */ @@ -769,7 +763,6 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = *firstblock; } else { @@ -785,28 +778,16 @@ try_another_ag: return error; } - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - dfops->dop_low = true; - goto try_another_ag; + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return -ENOSPC; } /* * Allocation can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || - (dfops->dop_low && - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); + args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; @@ -817,13 +798,8 @@ try_another_ag: */ abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, ablock, abp->b_bn, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); @@ -934,7 +910,6 @@ xfs_bmap_local_to_extents( * file currently fits in an inode. */ if (*firstblock == NULLFSBLOCK) { -try_another_ag: args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); args.type = XFS_ALLOCTYPE_START_BNO; } else { @@ -947,19 +922,6 @@ try_another_ag: if (error) goto done; - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - goto try_another_ag; - } /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); @@ -1269,7 +1231,6 @@ xfs_bmap_read_extents( xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ - xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ xfs_extnum_t i, j; /* index into the extents list */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ @@ -1278,11 +1239,8 @@ xfs_bmap_read_extents( /* REFERENCED */ xfs_extnum_t room; /* number of entries there's room for */ - bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : - XFS_EXTFMT_INODE(ip); block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. @@ -1291,9 +1249,7 @@ xfs_bmap_read_extents( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLFSBLOCK); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. @@ -1324,7 +1280,6 @@ xfs_bmap_read_extents( xfs_bmbt_rec_t *frp; xfs_fsblock_t nextbno; xfs_extnum_t num_recs; - xfs_extnum_t start; num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { @@ -1347,23 +1302,13 @@ xfs_bmap_read_extents( * Copy records into the extent records. */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); - start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); trp->l0 = be64_to_cpu(frp->l0); trp->l1 = be64_to_cpu(frp->l1); - } - if (exntf == XFS_EXTFMT_NOSTATE) { - /* - * Check all attribute bmap btree records and - * any "older" data bmap btree records for a - * set bit in the "extent flag" position. - */ - if (unlikely(xfs_check_nostate_extents(ifp, - start, num_recs))) { + if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) { XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_ERRLEVEL_LOW, mp); goto error0; } } @@ -1864,6 +1809,7 @@ xfs_bmap_add_extent_delay_real( */ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_state(ep, new->br_state); trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); (*nextents)++; @@ -2117,8 +2063,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2175,7 +2123,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); @@ -2202,6 +2149,7 @@ STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ + int whichfork, xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ @@ -2221,12 +2169,14 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - struct xfs_mount *mp = tp->t_mountp; + struct xfs_mount *mp = ip->i_mount; *logflagsp = 0; cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + ifp = XFS_IFORK_PTR(ip, whichfork); + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ASSERT(*idx >= 0); ASSERT(*idx <= xfs_iext_count(ifp)); @@ -2285,7 +2235,7 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < xfs_iext_count(&ip->i_df) - 1) { + if (*idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) @@ -2325,7 +2275,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 2, state); - ip->i_d.di_nextents -= 2; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2368,7 +2319,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2403,7 +2355,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_bmbt_set_state(ep, newext); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2515,7 +2468,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2593,7 +2547,8 @@ xfs_bmap_add_extent_unwritten_real( ++*idx; xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2641,7 +2596,8 @@ xfs_bmap_add_extent_unwritten_real( ++*idx; xfs_iext_insert(ip, *idx, 2, &r[0], state); - ip->i_d.di_nextents += 2; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2695,17 +2651,17 @@ xfs_bmap_add_extent_unwritten_real( } /* update reverse mappings */ - error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); + error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); if (error) goto done; /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { + if (xfs_bmap_needs_btree(ip, whichfork)) { int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, - 0, &tmp_logflags, XFS_DATA_FORK); + 0, &tmp_logflags, whichfork); *logflagsp |= tmp_logflags; if (error) goto done; @@ -2717,7 +2673,7 @@ xfs_bmap_add_extent_unwritten_real( *curp = cur; } - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); done: *logflagsp |= rval; return error; @@ -2809,7 +2765,8 @@ xfs_bmap_add_extent_hole_delay( oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); @@ -2830,7 +2787,8 @@ xfs_bmap_add_extent_hole_delay( xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); @@ -2846,7 +2804,8 @@ xfs_bmap_add_extent_hole_delay( temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); @@ -2878,41 +2837,45 @@ xfs_bmap_add_extent_hole_delay( */ STATIC int /* error */ xfs_bmap_add_extent_hole_real( - struct xfs_bmalloca *bma, - int whichfork) + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *idx, + struct xfs_btree_cur **curp, + struct xfs_bmbt_irec *new, + xfs_fsblock_t *first, + struct xfs_defer_ops *dfops, + int *logflagsp) { - struct xfs_bmbt_irec *new = &bma->got; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_mount *mp = ip->i_mount; + struct xfs_btree_cur *cur = *curp; int error; /* error return value */ int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ - struct xfs_mount *mp; - mp = bma->ip->i_mount; - ifp = XFS_IFORK_PTR(bma->ip, whichfork); - - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= xfs_iext_count(ifp)); + ASSERT(*idx >= 0); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - ASSERT(whichfork != XFS_COW_FORK); + ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); state = 0; if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; /* * Check and set flags if this segment has a left neighbor. */ - if (bma->idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -2921,9 +2884,9 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < xfs_iext_count(ifp)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -2960,36 +2923,36 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount + right.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); - if (bma->cur == NULL) { + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, + error = xfs_bmbt_lookup_eq(cur, right.br_startoff, right.br_startblock, right.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_btree_delete(bma->cur, &i); + error = xfs_btree_delete(cur, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); + error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount + @@ -3006,23 +2969,23 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (bma->cur == NULL) { + if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, + error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, left.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount, @@ -3038,25 +3001,25 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (bma->cur == NULL) { + if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, + error = xfs_bmbt_lookup_eq(cur, right.br_startoff, right.br_startblock, right.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, + error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, @@ -3072,22 +3035,22 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); - if (bma->cur == NULL) { + xfs_iext_insert(ip, *idx, 1, new, state); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, + error = xfs_bmbt_lookup_eq(cur, new->br_startoff, new->br_startblock, new->br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = new->br_state; - error = xfs_btree_insert(bma->cur, &i); + cur->bc_rec.b.br_state = new->br_state; + error = xfs_btree_insert(cur, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -3096,30 +3059,30 @@ xfs_bmap_add_extent_hole_real( } /* add reverse mapping */ - error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new); + error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new); if (error) goto done; /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, whichfork)) { + if (xfs_bmap_needs_btree(ip, whichfork)) { int tmp_logflags; /* partial log flag return val */ - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp, 0, &tmp_logflags, whichfork); - bma->logflags |= tmp_logflags; + *logflagsp |= tmp_logflags; + cur = *curp; if (error) goto done; } /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; + if (cur) + cur->bc_private.b.allocated = 0; - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); + xfs_bmap_check_leaf_extents(cur, ip, whichfork); done: - bma->logflags |= rval; + *logflagsp |= rval; return error; } @@ -3822,17 +3785,13 @@ xfs_bmap_btalloc( * the first block that was allocated. */ ASSERT(*ap->firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == - XFS_FSB_TO_AGNO(mp, args.fsbno) || - (ap->dfops->dop_low && - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < - XFS_FSB_TO_AGNO(mp, args.fsbno))); + XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= + XFS_FSB_TO_AGNO(mp, args.fsbno)); ap->blkno = args.fsbno; if (*ap->firstblock == NULLFSBLOCK) *ap->firstblock = args.fsbno; - ASSERT(nullfb || fb_agno == args.agno || - (ap->dfops->dop_low && fb_agno < args.agno)); + ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; if (!(ap->flags & XFS_BMAPI_COWFORK)) ap->ip->i_d.di_nblocks += args.len; @@ -3855,60 +3814,6 @@ xfs_bmap_btalloc( } /* - * For a remap operation, just "allocate" an extent at the address that the - * caller passed in, and ensure that the AGFL is the right size. The caller - * will then map the "allocated" extent into the file somewhere. - */ -STATIC int -xfs_bmap_remap_alloc( - struct xfs_bmalloca *ap) -{ - struct xfs_trans *tp = ap->tp; - struct xfs_mount *mp = tp->t_mountp; - xfs_agblock_t bno; - struct xfs_alloc_arg args; - int error; - - /* - * validate that the block number is legal - the enables us to detect - * and handle a silent filesystem corruption rather than crashing. - */ - memset(&args, 0, sizeof(struct xfs_alloc_arg)); - args.tp = ap->tp; - args.mp = ap->tp->t_mountp; - bno = *ap->firstblock; - args.agno = XFS_FSB_TO_AGNO(mp, bno); - args.agbno = XFS_FSB_TO_AGBNO(mp, bno); - if (args.agno >= mp->m_sb.sb_agcount || - args.agbno >= mp->m_sb.sb_agblocks) - return -EFSCORRUPTED; - - /* "Allocate" the extent from the range we passed in. */ - trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length); - ap->blkno = bno; - ap->ip->i_d.di_nblocks += ap->length; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - - /* Fix the freelist, like a real allocator does. */ - args.datatype = ap->datatype; - args.pag = xfs_perag_get(args.mp, args.agno); - ASSERT(args.pag); - - /* - * The freelist fixing code will decline the allocation if - * the size and shape of the free space doesn't allow for - * allocating the extent and updating all the metadata that - * happens during an allocation. We're remapping, not - * allocating, so skip that check by pretending to be freeing. - */ - error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); - xfs_perag_put(args.pag); - if (error) - trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); - return error; -} - -/* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * It figures out where to ask the underlying allocator to put the new extent. */ @@ -3916,8 +3821,6 @@ STATIC int xfs_bmap_alloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - if (ap->flags & XFS_BMAPI_REMAP) - return xfs_bmap_remap_alloc(ap); if (XFS_IS_REALTIME_INODE(ap->ip) && xfs_alloc_is_userdata(ap->datatype)) return xfs_bmap_rtalloc(ap); @@ -4089,7 +3992,7 @@ xfs_bmapi_read( if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + mp, XFS_ERRTAG_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } @@ -4156,6 +4059,19 @@ xfs_bmapi_read( return 0; } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, @@ -4242,13 +4158,8 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); - /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* * Tag the inode if blocks were preallocated. Note that COW fork @@ -4260,10 +4171,6 @@ xfs_bmapi_reserve_delalloc( if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) xfs_inode_set_cowblocks_tag(ip); - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); return 0; out_unreserve_blocks: @@ -4368,17 +4275,25 @@ xfs_bmapi_allocate( bma->got.br_state = XFS_EXT_NORM; /* - * A wasdelay extent has been initialized, so shouldn't be flagged - * as unwritten. + * In the data fork, a wasdelay extent has been initialized, so + * shouldn't be flagged as unwritten. + * + * For the cow fork, however, we convert delalloc reservations + * (extents allocated for speculative preallocation) to + * allocated unwritten extents, and only convert the unwritten + * extents to real extents when we're about to write the data. */ - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && + if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && + (bma->flags & XFS_BMAPI_PREALLOC) && xfs_sb_version_hasextflgbit(&mp->m_sb)) bma->got.br_state = XFS_EXT_UNWRITTEN; if (bma->wasdel) error = xfs_bmap_add_extent_delay_real(bma, whichfork); else - error = xfs_bmap_add_extent_hole_real(bma, whichfork); + error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, + whichfork, &bma->idx, &bma->cur, &bma->got, + bma->firstblock, bma->dfops, &bma->logflags); bma->logflags |= tmp_logflags; if (error) @@ -4422,8 +4337,6 @@ xfs_bmapi_convert_unwritten( (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) return 0; - ASSERT(whichfork != XFS_COW_FORK); - /* * Modify (by adding) the state flag, if writing. */ @@ -4448,8 +4361,8 @@ xfs_bmapi_convert_unwritten( return error; } - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, - &bma->cur, mval, bma->firstblock, bma->dfops, + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, + &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, &tmp_logflags); /* * Log the inode core unconditionally in the unwritten extent conversion @@ -4458,8 +4371,12 @@ xfs_bmapi_convert_unwritten( * in the transaction for the sake of fsync(), even if nothing has * changed, because fsync() will not force the log for this transaction * unless it sees the inode pinned. + * + * Note: If we're only converting cow fork extents, there aren't + * any on-disk updates to make, so we don't need to log anything. */ - bma->logflags |= tmp_logflags | XFS_ILOG_CORE; + if (whichfork != XFS_COW_FORK) + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; if (error) return error; @@ -4533,15 +4450,13 @@ xfs_bmapi_write( ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & XFS_BMAPI_IGSTATE)); - ASSERT(tp != NULL); + ASSERT(tp != NULL || + (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == + (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); - ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); - ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); + ASSERT(!(flags & XFS_BMAPI_REMAP)); /* zeroing is for currently only for data extents, not metadata */ ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != @@ -4558,7 +4473,7 @@ xfs_bmapi_write( if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + mp, XFS_ERRTAG_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } @@ -4625,13 +4540,8 @@ xfs_bmapi_write( } else { need_alloc = true; } - } else { - /* - * Make sure we only reflink into a hole. - */ - ASSERT(!(flags & XFS_BMAPI_REMAP)); - if (isnullstartblock(bma.got.br_startblock)) - wasdelay = true; + } else if (isnullstartblock(bma.got.br_startblock)) { + wasdelay = true; } /* @@ -4746,13 +4656,9 @@ error0: if (bma.cur) { if (!error) { ASSERT(*firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *firstblock) == + XFS_FSB_TO_AGNO(mp, *firstblock) <= XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock) || - (dfops->dop_low && - XFS_FSB_TO_AGNO(mp, *firstblock) < - XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock))); + bma.cur->bc_private.b.firstblock)); *firstblock = bma.cur->bc_private.b.firstblock; } xfs_btree_del_cursor(bma.cur, @@ -4764,6 +4670,93 @@ error0: return error; } +static int +xfs_bmapi_remap( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t bno, + xfs_filblks_t len, + xfs_fsblock_t startblock, + struct xfs_defer_ops *dfops) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + struct xfs_btree_cur *cur = NULL; + xfs_fsblock_t firstblock = NULLFSBLOCK; + struct xfs_bmbt_irec got; + xfs_extnum_t idx; + int logflags = 0, error; + + ASSERT(len > 0); + ASSERT(len <= (xfs_filblks_t)MAXEXTLEN); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + return error; + } + + if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) { + /* make sure we only reflink into a hole. */ + ASSERT(got.br_startoff > bno); + ASSERT(got.br_startoff - bno >= len); + } + + ip->i_d.di_nblocks += len; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); + cur->bc_private.b.firstblock = firstblock; + cur->bc_private.b.dfops = dfops; + cur->bc_private.b.flags = 0; + } + + got.br_startoff = bno; + got.br_startblock = startblock; + got.br_blockcount = len; + got.br_state = XFS_EXT_NORM; + + error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur, + &got, &firstblock, dfops, &logflags); + if (error) + goto error0; + + if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) { + int tmp_logflags = 0; + + error = xfs_bmap_btree_to_extents(tp, ip, cur, + &tmp_logflags, XFS_DATA_FORK); + logflags |= tmp_logflags; + } + +error0: + if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) + logflags &= ~XFS_ILOG_DEXT; + else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) + logflags &= ~XFS_ILOG_DBROOT; + + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (cur) { + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + return error; +} + /* * When a delalloc extent is split (e.g., due to a hole punch), the original * indlen reservation must be shared across the two new extents that are left @@ -4787,34 +4780,59 @@ xfs_bmap_split_indlen( xfs_filblks_t len2 = *indlen2; xfs_filblks_t nres = len1 + len2; /* new total res. */ xfs_filblks_t stolen = 0; + xfs_filblks_t resfactor; /* * Steal as many blocks as we can to try and satisfy the worst case * indlen for both new extents. */ - while (nres > ores && avail) { - nres--; - avail--; - stolen++; - } + if (ores < nres && avail) + stolen = XFS_FILBLKS_MIN(nres - ores, avail); + ores += stolen; + + /* nothing else to do if we've satisfied the new reservation */ + if (ores >= nres) + return stolen; /* - * The only blocks available are those reserved for the original - * extent and what we can steal from the extent being removed. - * If this still isn't enough to satisfy the combined - * requirements for the two new extents, skim blocks off of each - * of the new reservations until they match what is available. + * We can't meet the total required reservation for the two extents. + * Calculate the percent of the overall shortage between both extents + * and apply this percentage to each of the requested indlen values. + * This distributes the shortage fairly and reduces the chances that one + * of the two extents is left with nothing when extents are repeatedly + * split. */ - while (nres > ores) { - if (len1) { - len1--; - nres--; + resfactor = (ores * 100); + do_div(resfactor, nres); + len1 *= resfactor; + do_div(len1, 100); + len2 *= resfactor; + do_div(len2, 100); + ASSERT(len1 + len2 <= ores); + ASSERT(len1 < *indlen1 && len2 < *indlen2); + + /* + * Hand out the remainder to each extent. If one of the two reservations + * is zero, we want to make sure that one gets a block first. The loop + * below starts with len1, so hand len2 a block right off the bat if it + * is zero. + */ + ores -= (len1 + len2); + ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); + if (ores && !len2 && *indlen2) { + len2++; + ores--; + } + while (ores) { + if (len1 < *indlen1) { + len1++; + ores--; } - if (nres == ores) + if (!ores) break; - if (len2) { - len2--; - nres--; + if (len2 < *indlen2) { + len2++; + ores--; } } @@ -4856,7 +4874,7 @@ xfs_bmap_del_extent_delay( ASSERT(got_endoff >= del_endoff); if (isrt) { - int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); + uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_frextents(mp, rtexts); @@ -5416,6 +5434,7 @@ __xfs_bunmapi( int whichfork; /* data or attribute fork */ xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ + xfs_fileoff_t max_len; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5437,6 +5456,16 @@ __xfs_bunmapi( ASSERT(len > 0); ASSERT(nexts >= 0); + /* + * Guesstimate how many blocks we can unmap without running the risk of + * blowing out the transaction with a mix of EFIs and reflink + * adjustments. + */ + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); + else + max_len = len; + if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; @@ -5481,7 +5510,7 @@ __xfs_bunmapi( extno = 0; while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && - (nexts == 0 || extno < nexts)) { + (nexts == 0 || extno < nexts) && max_len > 0) { /* * Is the found extent after a hole in which bno lives? * Just back up to the previous extent, if so. @@ -5513,6 +5542,15 @@ __xfs_bunmapi( } if (del.br_startoff + del.br_blockcount > bno + 1) del.br_blockcount = bno + 1 - del.br_startoff; + + /* How much can we safely unmap? */ + if (max_len < del.br_blockcount) { + del.br_startoff += del.br_blockcount - max_len; + if (!wasdel) + del.br_startblock += del.br_blockcount - max_len; + del.br_blockcount = max_len; + } + sum = del.br_startblock + del.br_blockcount; if (isrt && (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { @@ -5556,8 +5594,8 @@ __xfs_bunmapi( } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, - &lastx, &cur, &del, firstblock, dfops, - &logflags); + whichfork, &lastx, &cur, &del, + firstblock, dfops, &logflags); if (error) goto error0; goto nodelete; @@ -5610,8 +5648,9 @@ __xfs_bunmapi( prev.br_state = XFS_EXT_UNWRITTEN; lastx--; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &prev, - firstblock, dfops, &logflags); + ip, whichfork, &lastx, &cur, + &prev, firstblock, dfops, + &logflags); if (error) goto error0; goto nodelete; @@ -5619,8 +5658,9 @@ __xfs_bunmapi( ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &del, - firstblock, dfops, &logflags); + ip, whichfork, &lastx, &cur, + &del, firstblock, dfops, + &logflags); if (error) goto error0; goto nodelete; @@ -5687,6 +5727,7 @@ __xfs_bunmapi( if (!isrt && wasdel) xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); + max_len -= del.br_blockcount; bno = del.br_startoff - 1; nodelete: /* @@ -6057,7 +6098,7 @@ xfs_bmap_shift_extents( if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + mp, XFS_ERRTAG_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmap_shift_extents", XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; @@ -6209,7 +6250,7 @@ xfs_bmap_split_extent_at( if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + mp, XFS_ERRTAG_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmap_split_extent_at", XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; @@ -6452,49 +6493,33 @@ xfs_bmap_finish_one( int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, + xfs_filblks_t *blockcount, xfs_exntst_t state) { - struct xfs_bmbt_irec bmap; - int nimaps = 1; xfs_fsblock_t firstfsb; - int flags = XFS_BMAPI_REMAP; - int done; int error = 0; - bmap.br_startblock = startblock; - bmap.br_startoff = startoff; - bmap.br_blockcount = blockcount; - bmap.br_state = state; - trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), - ip->i_ino, whichfork, startoff, blockcount, state); + ip->i_ino, whichfork, startoff, *blockcount, state); - if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) + if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) return -EFSCORRUPTED; - if (whichfork == XFS_ATTR_FORK) - flags |= XFS_BMAPI_ATTRFORK; if (XFS_TEST_ERROR(false, tp->t_mountp, - XFS_ERRTAG_BMAP_FINISH_ONE, - XFS_RANDOM_BMAP_FINISH_ONE)) + XFS_ERRTAG_BMAP_FINISH_ONE)) return -EIO; switch (type) { case XFS_BMAP_MAP: - firstfsb = bmap.br_startblock; - error = xfs_bmapi_write(tp, ip, bmap.br_startoff, - bmap.br_blockcount, flags, &firstfsb, - bmap.br_blockcount, &bmap, &nimaps, - dfops); + error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, + startblock, dfops); + *blockcount = 0; break; case XFS_BMAP_UNMAP: - error = xfs_bunmapi(tp, ip, bmap.br_startoff, - bmap.br_blockcount, flags, 1, &firstfsb, - dfops, &done); - ASSERT(done); + error = __xfs_bunmapi(tp, ip, startoff, blockcount, + XFS_BMAPI_REMAP, 1, &firstfsb, dfops); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cdef87db5262..851982a5dfbc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -172,6 +172,18 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags) /* + * Return true if the extent is a real, allocated extent, or false if it is a + * delayed allocation, and unwritten extent or a hole. + */ +static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) +{ + return irec->br_state != XFS_EXT_UNWRITTEN && + irec->br_startblock != HOLESTARTBLOCK && + irec->br_startblock != DELAYSTARTBLOCK && + !isnullstartblock(irec->br_startblock); +} + +/* * This macro is used to determine how many extents will be shifted * in one write transaction. We could require two splits, * an extent move on the first and an extent merge on the second, @@ -232,8 +244,6 @@ int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *del); void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); -int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, - xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, @@ -261,7 +271,7 @@ struct xfs_bmap_intent { int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops, struct xfs_inode *ip, enum xfs_bmap_intent_type type, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, xfs_exntst_t state); + xfs_filblks_t *blockcount, xfs_exntst_t state); int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d9be241fc86f..85de22513014 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt( xfs_bmbt_key_t *tkp; __be64 *tpp; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); - rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; @@ -100,8 +94,8 @@ xfs_bmdr_to_bmbt( */ STATIC void __xfs_bmbt_get_all( - __uint64_t l0, - __uint64_t l1, + uint64_t l0, + uint64_t l1, xfs_bmbt_irec_t *s) { int ext_flag; @@ -372,32 +366,6 @@ xfs_bmbt_to_bmdr( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Check extent records, which have just been read, for - * any bit in the extent flag field. ASSERT on debug - * kernels, as this condition should not occur. - * Return an error condition (1) if any flags found, - * otherwise return 0. - */ - -int -xfs_check_nostate_extents( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - xfs_extnum_t num) -{ - for (; num > 0; num--, idx++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); - if ((ep->l0 >> - (64 - BMBT_EXNTFLAG_BITLEN)) != 0) { - ASSERT(0); - return 1; - } - } - return 0; -} - - STATIC struct xfs_btree_cur * xfs_bmbt_dup_cursor( struct xfs_btree_cur *cur) @@ -453,7 +421,6 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; /* * Make sure there is sufficient room left in the AG to @@ -483,22 +450,6 @@ try_another_ag: if (error) goto error0; - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - cur->bc_private.b.dfops->dop_low = true; - args.fsbno = cur->bc_private.b.firstblock; - goto try_another_ag; - } - if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy @@ -512,7 +463,7 @@ try_another_ag: goto error0; cur->bc_private.b.dfops->dop_low = true; } - if (args.fsbno == NULLFSBLOCK) { + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; @@ -622,6 +573,16 @@ xfs_bmbt_init_key_from_rec( } STATIC void +xfs_bmbt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->bmbt.br_startoff = cpu_to_be64( + xfs_bmbt_disk_get_startoff(&rec->bmbt) + + xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1); +} + +STATIC void xfs_bmbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) @@ -637,15 +598,25 @@ xfs_bmbt_init_ptr_from_cur( ptr->l = 0; } -STATIC __int64_t +STATIC int64_t xfs_bmbt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) { - return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - + return (int64_t)be64_to_cpu(key->bmbt.br_startoff) - cur->bc_rec.b.br_startoff; } +STATIC int64_t +xfs_bmbt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) - + be64_to_cpu(k2->bmbt.br_startoff); +} + static bool xfs_bmbt_verify( struct xfs_buf *bp) @@ -736,7 +707,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { }; -#if defined(DEBUG) || defined(XFS_WARN) STATIC int xfs_bmbt_keys_inorder( struct xfs_btree_cur *cur, @@ -757,7 +727,6 @@ xfs_bmbt_recs_inorder( xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= xfs_bmbt_disk_get_startoff(&r2->bmbt); } -#endif /* DEBUG */ static const struct xfs_btree_ops xfs_bmbt_ops = { .rec_len = sizeof(xfs_bmbt_rec_t), @@ -771,14 +740,14 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .get_minrecs = xfs_bmbt_get_minrecs, .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec, .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, + .diff_two_keys = xfs_bmbt_diff_two_keys, .buf_ops = &xfs_bmbt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, -#endif }; /* diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 819a8a4dee95..9da5a8d4f184 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -25,14 +25,6 @@ struct xfs_inode; struct xfs_trans; /* - * Extent state and extent format macros. - */ -#define XFS_EXTFMT_INODE(x) \ - (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \ - XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) -#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) - -/* * Btree block header size depends on a superblock flag. */ #define XFS_BMBT_BLOCK_LEN(mp) \ @@ -140,4 +132,18 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +/* + * Check that the extent does not contain an invalid unwritten extent flag. + */ +static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, + struct xfs_bmbt_rec_host *ep) +{ + if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0) + return true; + if (whichfork == XFS_DATA_FORK && + xfs_sb_version_hasextflgbit(&mp->m_sb)) + return true; + return false; +} + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 21e6a6ab6b9a..4da85fff69ad 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -43,15 +43,25 @@ kmem_zone_t *xfs_btree_cur_zone; /* * Btree magic numbers. */ -static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { +static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, XFS_FIBT_MAGIC, 0 }, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, XFS_REFC_CRC_MAGIC } }; -#define xfs_btree_magic(cur) \ - xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] + +uint32_t +xfs_btree_magic( + int crc, + xfs_btnum_t btnum) +{ + uint32_t magic = xfs_magics[crc][btnum]; + + /* Ensure we asked for crc for crc-only magics. */ + ASSERT(magic != 0); + return magic; +} STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( @@ -62,10 +72,13 @@ xfs_btree_check_lblock( { int lblock_ok = 1; /* block passes checks */ struct xfs_mount *mp; /* file system mount point */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { lblock_ok = lblock_ok && uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -74,7 +87,7 @@ xfs_btree_check_lblock( } lblock_ok = lblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -88,8 +101,7 @@ xfs_btree_check_lblock( be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, - XFS_ERRTAG_BTREE_CHECK_LBLOCK, - XFS_RANDOM_BTREE_CHECK_LBLOCK))) { + XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); @@ -110,13 +122,16 @@ xfs_btree_check_sblock( struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ int sblock_ok = 1; /* block passes checks */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); agbp = cur->bc_private.a.agbp; agf = XFS_BUF_TO_AGF(agbp); agflen = be32_to_cpu(agf->agf_length); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { sblock_ok = sblock_ok && uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -125,7 +140,7 @@ xfs_btree_check_sblock( } sblock_ok = sblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -137,8 +152,7 @@ xfs_btree_check_sblock( block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, - XFS_ERRTAG_BTREE_CHECK_SBLOCK, - XFS_RANDOM_BTREE_CHECK_SBLOCK))) { + XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); @@ -552,7 +566,7 @@ xfs_btree_ptr_offset( /* * Return a pointer to the n-th record in the btree block. */ -STATIC union xfs_btree_rec * +union xfs_btree_rec * xfs_btree_rec_addr( struct xfs_btree_cur *cur, int n, @@ -565,7 +579,7 @@ xfs_btree_rec_addr( /* * Return a pointer to the n-th key in the btree block. */ -STATIC union xfs_btree_key * +union xfs_btree_key * xfs_btree_key_addr( struct xfs_btree_cur *cur, int n, @@ -578,7 +592,7 @@ xfs_btree_key_addr( /* * Return a pointer to the n-th high key in the btree block. */ -STATIC union xfs_btree_key * +union xfs_btree_key * xfs_btree_high_key_addr( struct xfs_btree_cur *cur, int n, @@ -591,7 +605,7 @@ xfs_btree_high_key_addr( /* * Return a pointer to the n-th block pointer in the btree block. */ -STATIC union xfs_btree_ptr * +union xfs_btree_ptr * xfs_btree_ptr_addr( struct xfs_btree_cur *cur, int n, @@ -625,7 +639,7 @@ xfs_btree_get_iroot( * Retrieve the block pointer from the cursor at the given level. * This may be an inode btree root or from a buffer. */ -STATIC struct xfs_btree_block * /* generic btree block pointer */ +struct xfs_btree_block * /* generic btree block pointer */ xfs_btree_get_block( struct xfs_btree_cur *cur, /* btree cursor */ int level, /* level in btree */ @@ -762,14 +776,14 @@ xfs_btree_lastrec( */ void xfs_btree_offsets( - __int64_t fields, /* bitmask of fields */ + int64_t fields, /* bitmask of fields */ const short *offsets, /* table of field offsets */ int nbits, /* number of bits to inspect */ int *first, /* output: first byte offset */ int *last) /* output: last byte offset */ { int i; /* current bit number */ - __int64_t imask; /* mask for current bit number */ + int64_t imask; /* mask for current bit number */ ASSERT(fields != 0); /* @@ -810,7 +824,8 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - ASSERT(fsbno != NULLFSBLOCK); + if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, mp->m_bsize, lock, &bp, ops); @@ -1084,12 +1099,15 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { + int crc = xfs_sb_version_hascrc(&mp->m_sb); + __u32 magic = xfs_btree_magic(crc, btnum); + buf->bb_magic = cpu_to_be32(magic); buf->bb_level = cpu_to_be16(level); buf->bb_numrecs = cpu_to_be16(numrecs); @@ -1097,7 +1115,7 @@ xfs_btree_init_block_int( if (flags & XFS_BTREE_LONG_PTRS) { buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); @@ -1110,7 +1128,7 @@ xfs_btree_init_block_int( buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); buf->bb_u.s.bb_owner = cpu_to_be32(__owner); uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); @@ -1123,14 +1141,14 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - magic, level, numrecs, owner, flags); + btnum, level, numrecs, owner, flags); } STATIC void @@ -1140,7 +1158,7 @@ xfs_btree_init_block_cur( int level, int numrecs) { - __u64 owner; + __u64 owner; /* * we can pull the owner from the cursor right now as the different @@ -1154,7 +1172,7 @@ xfs_btree_init_block_cur( owner = cur->bc_private.a.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - xfs_btree_magic(cur), level, numrecs, + cur->bc_btnum, level, numrecs, owner, cur->bc_flags); } @@ -1736,7 +1754,7 @@ error0: return error; } -STATIC int +int xfs_btree_lookup_get_block( struct xfs_btree_cur *cur, /* btree cursor */ int level, /* level in the btree */ @@ -1826,7 +1844,7 @@ xfs_btree_lookup( int *stat) /* success/failure */ { struct xfs_btree_block *block; /* current btree block */ - __int64_t diff; /* difference for the current key */ + int64_t diff; /* difference for the current key */ int error; /* error return value */ int keyno; /* current key number */ int level; /* level in the btree */ @@ -2866,7 +2884,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_FSTRANS; + unsigned long new_pflags = PF_MEMALLOC_NOFS; /* * we are in a transaction context here, but may also be doing work @@ -4375,7 +4393,7 @@ xfs_btree_visit_blocks( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ @@ -4415,7 +4433,7 @@ xfs_btree_visit_blocks( * recovery completion writes the changes to disk. */ struct xfs_btree_block_change_owner_info { - __uint64_t new_owner; + uint64_t new_owner; struct list_head *buffer_list; }; @@ -4461,7 +4479,7 @@ xfs_btree_block_change_owner( int xfs_btree_change_owner( struct xfs_btree_cur *cur, - __uint64_t new_owner, + uint64_t new_owner, struct list_head *buffer_list) { struct xfs_btree_block_change_owner_info bbcoi; @@ -4565,7 +4583,7 @@ xfs_btree_simple_query_range( { union xfs_btree_rec *recp; union xfs_btree_key rec_key; - __int64_t diff; + int64_t diff; int stat; bool firstrec = true; int error; @@ -4662,8 +4680,8 @@ xfs_btree_overlapped_query_range( union xfs_btree_key *hkp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - __int64_t ldiff; - __int64_t hdiff; + int64_t ldiff; + int64_t hdiff; int level; struct xfs_buf *bp; int i; @@ -4822,6 +4840,23 @@ xfs_btree_query_range( fn, priv); } +/* Query a btree for all records. */ +int +xfs_btree_query_all( + struct xfs_btree_cur *cur, + xfs_btree_query_range_fn fn, + void *priv) +{ + union xfs_btree_key low_key; + union xfs_btree_key high_key; + + memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); + memset(&low_key, 0, sizeof(low_key)); + memset(&high_key, 0xFF, sizeof(high_key)); + + return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); +} + /* * Calculate the number of blocks needed to store a given number of records * in a short-format (per-AG metadata) btree. diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b69b947c4c1b..9c95e965cfe5 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -76,6 +76,8 @@ union xfs_btree_rec { #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) +uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); + /* * For logging record fields. */ @@ -148,20 +150,19 @@ struct xfs_btree_ops { union xfs_btree_rec *rec); /* difference between key value and cursor value */ - __int64_t (*key_diff)(struct xfs_btree_cur *cur, + int64_t (*key_diff)(struct xfs_btree_cur *cur, union xfs_btree_key *key); /* * Difference between key2 and key1 -- positive if key1 > key2, * negative if key1 < key2, and zero if equal. */ - __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, + int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, union xfs_btree_key *key1, union xfs_btree_key *key2); const struct xfs_buf_ops *buf_ops; -#if defined(DEBUG) || defined(XFS_WARN) /* check that k1 is lower than k2 */ int (*keys_inorder)(struct xfs_btree_cur *cur, union xfs_btree_key *k1, @@ -171,7 +172,6 @@ struct xfs_btree_ops { int (*recs_inorder)(struct xfs_btree_cur *cur, union xfs_btree_rec *r1, union xfs_btree_rec *r2); -#endif }; /* @@ -211,11 +211,11 @@ typedef struct xfs_btree_cur union xfs_btree_irec bc_rec; /* current insert/search record value */ struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ - __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ + uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ #define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ #define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ - __uint8_t bc_nlevels; /* number of levels in the tree */ - __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ + uint8_t bc_nlevels; /* number of levels in the tree */ + uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ xfs_btnum_t bc_btnum; /* identifies which btree type */ int bc_statoff; /* offset of btre stats array */ union { @@ -328,7 +328,7 @@ xfs_btree_islastblock( */ void xfs_btree_offsets( - __int64_t fields, /* bitmask of fields */ + int64_t fields, /* bitmask of fields */ const short *offsets,/* table of field offsets */ int nbits, /* number of bits to inspect */ int *first, /* output: first byte offset */ @@ -378,7 +378,7 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, @@ -389,7 +389,7 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, @@ -406,7 +406,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); -int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner, +int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner, struct list_head *buffer_list); /* @@ -432,7 +432,7 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) } static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block, - __uint16_t numrecs) + uint16_t numrecs) { block->bb_numrecs = cpu_to_be16(numrecs); } @@ -456,7 +456,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) #define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ + (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) /* @@ -494,6 +494,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, int xfs_btree_query_range(struct xfs_btree_cur *cur, union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec, xfs_btree_query_range_fn fn, void *priv); +int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn, + void *priv); typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, void *data); @@ -502,4 +504,17 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks); +union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n, + struct xfs_btree_block *block); +union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n, + struct xfs_btree_block *block); +union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n, + struct xfs_btree_block *block); +union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n, + struct xfs_btree_block *block); +int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level, + union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); +struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, + int level, struct xfs_buf **bpp); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h index a416c7cb23ea..8211f48b98e6 100644 --- a/fs/xfs/libxfs/xfs_cksum.h +++ b/fs/xfs/libxfs/xfs_cksum.h @@ -1,7 +1,7 @@ #ifndef _XFS_CKSUM_H #define _XFS_CKSUM_H 1 -#define XFS_CRC_SEED (~(__uint32_t)0) +#define XFS_CRC_SEED (~(uint32_t)0) /* * Calculate the intermediate checksum for a buffer that has the CRC field @@ -9,11 +9,11 @@ * cksum_offset parameter. We do not modify the buffer during verification, * hence we have to split the CRC calculation across the cksum_offset. */ -static inline __uint32_t +static inline uint32_t xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset) { - __uint32_t zero = 0; - __uint32_t crc; + uint32_t zero = 0; + uint32_t crc; /* Calculate CRC up to the checksum. */ crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset); @@ -30,7 +30,7 @@ xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset) * Fast CRC method where the buffer is modified. Callers must have exclusive * access to the buffer while the calculation takes place. */ -static inline __uint32_t +static inline uint32_t xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset) { /* zero the CRC field */ @@ -48,7 +48,7 @@ xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset) * so that it is consistent on disk. */ static inline __le32 -xfs_end_cksum(__uint32_t crc) +xfs_end_cksum(uint32_t crc) { return ~cpu_to_le32(crc); } @@ -62,7 +62,7 @@ xfs_end_cksum(__uint32_t crc) static inline void xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) { - __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset); + uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset); *(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc); } @@ -73,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) static inline int xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset) { - __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset); + uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset); return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc); } diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index f2dc1a950c85..6d4335815c3f 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -263,7 +263,7 @@ xfs_da3_node_read( err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, which_fork, &xfs_da3_node_buf_ops); - if (!err && tp) { + if (!err && tp && *bpp) { struct xfs_da_blkinfo *info = (*bpp)->b_addr; int type; @@ -1282,7 +1282,7 @@ xfs_da3_fixhashpath( return; break; case XFS_DIR2_LEAFN_MAGIC: - lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); + lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); if (count == 0) return; break; @@ -1502,8 +1502,8 @@ xfs_da3_node_lookup_int( if (blk->magic == XFS_DIR2_LEAFN_MAGIC || blk->magic == XFS_DIR3_LEAFN_MAGIC) { blk->magic = XFS_DIR2_LEAFN_MAGIC; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); + blk->hashval = xfs_dir2_leaf_lasthash(args->dp, + blk->bp, NULL); break; } @@ -1929,8 +1929,8 @@ xfs_da3_path_shift( blk->magic = XFS_DIR2_LEAFN_MAGIC; ASSERT(level == path->active-1); blk->index = 0; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); + blk->hashval = xfs_dir2_leaf_lasthash(args->dp, + blk->bp, NULL); break; default: ASSERT(0); @@ -1952,7 +1952,7 @@ xfs_da3_path_shift( * This is implemented with some source-level loop unrolling. */ xfs_dahash_t -xfs_da_hashname(const __uint8_t *name, int namelen) +xfs_da_hashname(const uint8_t *name, int namelen) { xfs_dahash_t hash; @@ -2633,7 +2633,7 @@ out_free: /* * Readahead the dir/attr block. */ -xfs_daddr_t +int xfs_da_reada_buf( struct xfs_inode *dp, xfs_dablk_t bno, @@ -2664,7 +2664,5 @@ out_free: if (mapp != &map) kmem_free(mapp); - if (error) - return -1; - return mappedbno; + return error; } diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 98c75cbe6ac2..ae6de17467f2 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -60,10 +60,10 @@ enum xfs_dacmp { */ typedef struct xfs_da_args { struct xfs_da_geometry *geo; /* da block geometry */ - const __uint8_t *name; /* string (maybe not NULL terminated) */ + const uint8_t *name; /* string (maybe not NULL terminated) */ int namelen; /* length of string (maybe no NULL) */ - __uint8_t filetype; /* filetype of inode for directories */ - __uint8_t *value; /* set of bytes (maybe contain NULLs) */ + uint8_t filetype; /* filetype of inode for directories */ + uint8_t *value; /* set of bytes (maybe contain NULLs) */ int valuelen; /* length of value */ int flags; /* argument flags (eg: ATTR_NOCREATE) */ xfs_dahash_t hashval; /* hash value of name */ @@ -201,13 +201,13 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops); -xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, +int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, int whichfork, const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, struct xfs_buf *dead_buf); -uint xfs_da_hashname(const __uint8_t *name_string, int name_length); +uint xfs_da_hashname(const uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, const unsigned char *name, int len); diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index f1e8d4dbb600..6d77d1a8498a 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -49,7 +49,7 @@ xfs_dir3_sf_entsize( struct xfs_dir2_sf_hdr *hdr, int len) { - return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); + return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t); } static struct xfs_dir2_sf_entry * @@ -77,7 +77,7 @@ xfs_dir3_sf_nextentry( * not necessary. For non-filetype enable directories, the type is always * unknown and we never store the value. */ -static __uint8_t +static uint8_t xfs_dir2_sfe_get_ftype( struct xfs_dir2_sf_entry *sfep) { @@ -87,16 +87,16 @@ xfs_dir2_sfe_get_ftype( static void xfs_dir2_sfe_put_ftype( struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) + uint8_t ftype) { ASSERT(ftype < XFS_DIR3_FT_MAX); } -static __uint8_t +static uint8_t xfs_dir3_sfe_get_ftype( struct xfs_dir2_sf_entry *sfep) { - __uint8_t ftype; + uint8_t ftype; ftype = sfep->name[sfep->namelen]; if (ftype >= XFS_DIR3_FT_MAX) @@ -107,7 +107,7 @@ xfs_dir3_sfe_get_ftype( static void xfs_dir3_sfe_put_ftype( struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) + uint8_t ftype) { ASSERT(ftype < XFS_DIR3_FT_MAX); @@ -124,7 +124,7 @@ xfs_dir3_sfe_put_ftype( static xfs_ino_t xfs_dir2_sf_get_ino( struct xfs_dir2_sf_hdr *hdr, - __uint8_t *from) + uint8_t *from) { if (hdr->i8count) return get_unaligned_be64(from) & 0x00ffffffffffffffULL; @@ -135,7 +135,7 @@ xfs_dir2_sf_get_ino( static void xfs_dir2_sf_put_ino( struct xfs_dir2_sf_hdr *hdr, - __uint8_t *to, + uint8_t *to, xfs_ino_t ino) { ASSERT((ino & 0xff00000000000000ULL) == 0); @@ -225,7 +225,7 @@ xfs_dir3_sfe_put_ino( #define XFS_DIR3_DATA_ENTSIZE(n) \ round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ + sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \ XFS_DIR2_DATA_ALIGN) static int @@ -242,7 +242,7 @@ xfs_dir3_data_entsize( return XFS_DIR3_DATA_ENTSIZE(n); } -static __uint8_t +static uint8_t xfs_dir2_data_get_ftype( struct xfs_dir2_data_entry *dep) { @@ -252,16 +252,16 @@ xfs_dir2_data_get_ftype( static void xfs_dir2_data_put_ftype( struct xfs_dir2_data_entry *dep, - __uint8_t ftype) + uint8_t ftype) { ASSERT(ftype < XFS_DIR3_FT_MAX); } -static __uint8_t +static uint8_t xfs_dir3_data_get_ftype( struct xfs_dir2_data_entry *dep) { - __uint8_t ftype = dep->name[dep->namelen]; + uint8_t ftype = dep->name[dep->namelen]; if (ftype >= XFS_DIR3_FT_MAX) return XFS_DIR3_FT_UNKNOWN; @@ -271,7 +271,7 @@ xfs_dir3_data_get_ftype( static void xfs_dir3_data_put_ftype( struct xfs_dir2_data_entry *dep, - __uint8_t type) + uint8_t type) { ASSERT(type < XFS_DIR3_FT_MAX); ASSERT(dep->namelen != 0); diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 9a492a9e19bd..3771edcb301d 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -111,11 +111,11 @@ struct xfs_da3_intnode { * appropriate. */ struct xfs_da3_icnode_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t level; + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t level; }; /* @@ -187,14 +187,14 @@ struct xfs_da3_icnode_hdr { /* * Byte offset in data block and shortform entry. */ -typedef __uint16_t xfs_dir2_data_off_t; +typedef uint16_t xfs_dir2_data_off_t; #define NULLDATAOFF 0xffffU typedef uint xfs_dir2_data_aoff_t; /* argument form */ /* * Offset in data space of a data entry. */ -typedef __uint32_t xfs_dir2_dataptr_t; +typedef uint32_t xfs_dir2_dataptr_t; #define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) #define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) @@ -206,7 +206,7 @@ typedef xfs_off_t xfs_dir2_off_t; /* * Directory block number (logical dirblk in file) */ -typedef __uint32_t xfs_dir2_db_t; +typedef uint32_t xfs_dir2_db_t; #define XFS_INO32_SIZE 4 #define XFS_INO64_SIZE 8 @@ -226,9 +226,9 @@ typedef __uint32_t xfs_dir2_db_t; * over them. */ typedef struct xfs_dir2_sf_hdr { - __uint8_t count; /* count of entries */ - __uint8_t i8count; /* count of 8-byte inode #s */ - __uint8_t parent[8]; /* parent dir inode number */ + uint8_t count; /* count of entries */ + uint8_t i8count; /* count of 8-byte inode #s */ + uint8_t parent[8]; /* parent dir inode number */ } __packed xfs_dir2_sf_hdr_t; typedef struct xfs_dir2_sf_entry { @@ -447,11 +447,11 @@ struct xfs_dir3_leaf_hdr { }; struct xfs_dir3_icleaf_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t stale; + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t stale; }; /* @@ -538,10 +538,10 @@ struct xfs_dir3_free { * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk. */ struct xfs_dir3_icfree_hdr { - __uint32_t magic; - __uint32_t firstdb; - __uint32_t nvalid; - __uint32_t nused; + uint32_t magic; + uint32_t firstdb; + uint32_t nvalid; + uint32_t nused; }; @@ -632,10 +632,10 @@ typedef struct xfs_attr_shortform { __u8 padding; } hdr; struct xfs_attr_sf_entry { - __uint8_t namelen; /* actual length of name (no NULL) */ - __uint8_t valuelen; /* actual length of value (no NULL) */ - __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - __uint8_t nameval[1]; /* name & value bytes concatenated */ + uint8_t namelen; /* actual length of name (no NULL) */ + uint8_t valuelen; /* actual length of value (no NULL) */ + uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ + uint8_t nameval[1]; /* name & value bytes concatenated */ } list[1]; /* variable sized array */ } xfs_attr_shortform_t; @@ -725,22 +725,22 @@ struct xfs_attr3_leafblock { * incore, neutral version of the attribute leaf header */ struct xfs_attr3_icleaf_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t usedbytes; + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t usedbytes; /* * firstused is 32-bit here instead of 16-bit like the on-disk variant * to support maximum fsb size of 64k without overflow issues throughout * the attr code. Instead, the overflow condition is handled on * conversion to/from disk. */ - __uint32_t firstused; + uint32_t firstused; __u8 holes; struct { - __uint16_t base; - __uint16_t size; + uint16_t base; + uint16_t size; } freemap[XFS_ATTR_LEAF_MAPSIZE]; }; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 2f389d366e93..ccf9783fd3f0 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -218,8 +218,7 @@ xfs_dir_ino_validate( agblkno != 0 && ioff < (1 << mp->m_sb.sb_inopblog) && XFS_AGINO_TO_INO(mp, agno, agino) == ino; - if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, - XFS_RANDOM_DIR_INO_VALIDATE))) { + if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) { xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index d6e6d9d16f6c..21c8f8bf94d5 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -47,9 +47,9 @@ struct xfs_dir_ops { struct xfs_dir2_sf_entry * (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep); - __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); + uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype); + uint8_t ftype); xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep); void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, @@ -60,9 +60,9 @@ struct xfs_dir_ops { xfs_ino_t ino); int (*data_entsize)(int len); - __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); + uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); void (*data_put_ftype)(struct xfs_dir2_data_entry *dep, - __uint8_t ftype); + uint8_t ftype); __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); struct xfs_dir2_data_free * (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index aa17cb788946..43c902f7a68d 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -139,7 +139,7 @@ xfs_dir3_block_read( err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index b887fb2a2bcf..27297a689d9c 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -145,7 +145,7 @@ xfs_dir3_leaf_check_int( static bool xfs_dir3_leaf_verify( struct xfs_buf *bp, - __uint16_t magic) + uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -154,7 +154,7 @@ xfs_dir3_leaf_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - __uint16_t magic3; + uint16_t magic3; magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC : XFS_DIR3_LEAFN_MAGIC; @@ -178,7 +178,7 @@ xfs_dir3_leaf_verify( static void __read_verify( struct xfs_buf *bp, - __uint16_t magic) + uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; @@ -195,7 +195,7 @@ __read_verify( static void __write_verify( struct xfs_buf *bp, - __uint16_t magic) + uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; @@ -256,7 +256,7 @@ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { .verify_write = xfs_dir3_leafn_write_verify, }; -static int +int xfs_dir3_leaf_read( struct xfs_trans *tp, struct xfs_inode *dp, @@ -268,7 +268,7 @@ xfs_dir3_leaf_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); return err; } @@ -285,7 +285,7 @@ xfs_dir3_leafn_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); return err; } @@ -299,7 +299,7 @@ xfs_dir3_leaf_init( struct xfs_trans *tp, struct xfs_buf *bp, xfs_ino_t owner, - __uint16_t type) + uint16_t type) { struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -343,7 +343,7 @@ xfs_dir3_leaf_get_buf( xfs_da_args_t *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, - __uint16_t magic) + uint16_t magic) { struct xfs_inode *dp = args->dp; struct xfs_trans *tp = args->trans; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 75a557432d0f..682e2bf370c7 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .verify_write = xfs_dir3_free_write_verify, }; +/* Everything ok in the free block header? */ +static bool +xfs_dir3_free_header_check( + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + unsigned int firstdb; + int maxbests; + + maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); + firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - + xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * + maxbests; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; + + if (be32_to_cpu(hdr3->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr3->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) + return false; + } else { + struct xfs_dir2_free_hdr *hdr = bp->b_addr; + + if (be32_to_cpu(hdr->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) + return false; + } + return true; +} static int __xfs_dir3_free_read( @@ -168,11 +204,22 @@ __xfs_dir3_free_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_free_buf_ops); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { + xfs_buf_ioerror(*bpp, -EFSCORRUPTED); + xfs_verifier_error(*bpp); + xfs_trans_brelse(tp, *bpp); + return -EFSCORRUPTED; + } /* try read returns without an error or *bpp if it lands in a hole */ - if (!err && tp && *bpp) + if (tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); - return err; + + return 0; } int @@ -481,7 +528,7 @@ xfs_dir2_free_hdr_check( * Stale entries are ok. */ xfs_dahash_t /* hash value */ -xfs_dir2_leafn_lasthash( +xfs_dir2_leaf_lasthash( struct xfs_inode *dp, struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ @@ -493,7 +540,9 @@ xfs_dir2_leafn_lasthash( dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || - leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); + leafhdr.magic == XFS_DIR3_LEAFN_MAGIC || + leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || + leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); if (count) *count = leafhdr.count; @@ -1358,8 +1407,8 @@ xfs_dir2_leafn_split( /* * Update last hashval in each block since we added the name. */ - oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); - newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); + oldblk->hashval = xfs_dir2_leaf_lasthash(dp, oldblk->bp, NULL); + newblk->hashval = xfs_dir2_leaf_lasthash(dp, newblk->bp, NULL); xfs_dir3_leaf_check(dp, oldblk->bp); xfs_dir3_leaf_check(dp, newblk->bp); return error; diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index d04547fcf274..4badd26c47e6 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -58,6 +58,8 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, struct xfs_buf **bpp); /* xfs_dir2_leaf.c */ +extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, @@ -69,7 +71,7 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_dir2_leaf_entry *ents, int *indexp, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_buf **bpp, __uint16_t magic); + struct xfs_buf **bpp, uint16_t magic); extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, struct xfs_buf *bp, int first, int last); extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, @@ -93,7 +95,7 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_buf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, +extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp, struct xfs_buf *bp, int *count); extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, struct xfs_da_args *args, int *indexp, @@ -125,9 +127,10 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); +extern int xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ -extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, - size_t bufsize); +extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, + struct dir_context *ctx, size_t bufsize); #endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index c6809ff41197..be8b9755f66a 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -629,6 +629,112 @@ xfs_dir2_sf_check( } #endif /* DEBUG */ +/* Verify the consistency of an inline directory. */ +int +xfs_dir2_sf_verify( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dir2_sf_hdr *sfp; + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + char *endp; + const struct xfs_dir_ops *dops; + struct xfs_ifork *ifp; + xfs_ino_t ino; + int i; + int i8count; + int offset; + int size; + int error; + uint8_t filetype; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + /* + * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops, + * so we can only trust the mountpoint to have the right pointer. + */ + dops = xfs_dir_get_ops(mp, NULL); + + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the directory is way too short. + */ + if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || + size < xfs_dir2_sf_hdr_size(sfp->i8count)) + return -EFSCORRUPTED; + + endp = (char *)sfp + size; + + /* Check .. entry */ + ino = dops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; + offset = dops->data_first_offset; + + /* Check all reported entries */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + /* + * struct xfs_dir2_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return -EFSCORRUPTED; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return -EFSCORRUPTED; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = dops->sf_nextentry(sfp, sfep); + if (endp < (char *)next_sfep) + return -EFSCORRUPTED; + + /* Check that the offsets always increase. */ + if (xfs_dir2_sf_get_offset(sfep) < offset) + return -EFSCORRUPTED; + + /* Check the inode number. */ + ino = dops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; + + /* Check the file type. */ + filetype = dops->sf_get_ftype(sfep); + if (filetype >= XFS_DIR3_FT_MAX) + return -EFSCORRUPTED; + + offset = xfs_dir2_sf_get_offset(sfep) + + dops->data_entsize(sfep->namelen); + + sfep = next_sfep; + } + if (i8count != sfp->i8count) + return -EFSCORRUPTED; + if ((void *)sfep != (void *)endp) + return -EFSCORRUPTED; + + /* Make sure this whole thing ought to be in local format. */ + if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) + return -EFSCORRUPTED; + + return 0; +} + /* * Create a new (shortform) directory. */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index ac9a003dd29a..747085b4ef44 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -35,13 +35,8 @@ int xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { - unsigned int ndquots; - ASSERT(nbblks > 0); - ndquots = BBTOB(nbblks); - do_div(ndquots, sizeof(xfs_dqblk_t)); - - return ndquots; + return BBTOB(nbblks) / sizeof(xfs_dqblk_t); } /* diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 6b7579e7b60a..23229f0c5b15 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -103,8 +103,8 @@ struct xfs_ifork; * Must be padded to 64 bit alignment. */ typedef struct xfs_sb { - __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ - __uint32_t sb_blocksize; /* logical block size, bytes */ + uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ + uint32_t sb_blocksize; /* logical block size, bytes */ xfs_rfsblock_t sb_dblocks; /* number of data blocks */ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ xfs_rtblock_t sb_rextents; /* number of realtime extents */ @@ -118,45 +118,45 @@ typedef struct xfs_sb { xfs_agnumber_t sb_agcount; /* number of allocation groups */ xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */ xfs_extlen_t sb_logblocks; /* number of log blocks */ - __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ - __uint16_t sb_sectsize; /* volume sector size, bytes */ - __uint16_t sb_inodesize; /* inode size, bytes */ - __uint16_t sb_inopblock; /* inodes per block */ + uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ + uint16_t sb_sectsize; /* volume sector size, bytes */ + uint16_t sb_inodesize; /* inode size, bytes */ + uint16_t sb_inopblock; /* inodes per block */ char sb_fname[12]; /* file system name */ - __uint8_t sb_blocklog; /* log2 of sb_blocksize */ - __uint8_t sb_sectlog; /* log2 of sb_sectsize */ - __uint8_t sb_inodelog; /* log2 of sb_inodesize */ - __uint8_t sb_inopblog; /* log2 of sb_inopblock */ - __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ - __uint8_t sb_rextslog; /* log2 of sb_rextents */ - __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ - __uint8_t sb_imax_pct; /* max % of fs for inode space */ + uint8_t sb_blocklog; /* log2 of sb_blocksize */ + uint8_t sb_sectlog; /* log2 of sb_sectsize */ + uint8_t sb_inodelog; /* log2 of sb_inodesize */ + uint8_t sb_inopblog; /* log2 of sb_inopblock */ + uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ + uint8_t sb_rextslog; /* log2 of sb_rextents */ + uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ + uint8_t sb_imax_pct; /* max % of fs for inode space */ /* statistics */ /* * These fields must remain contiguous. If you really * want to change their layout, make sure you fix the * code in xfs_trans_apply_sb_deltas(). */ - __uint64_t sb_icount; /* allocated inodes */ - __uint64_t sb_ifree; /* free inodes */ - __uint64_t sb_fdblocks; /* free data blocks */ - __uint64_t sb_frextents; /* free realtime extents */ + uint64_t sb_icount; /* allocated inodes */ + uint64_t sb_ifree; /* free inodes */ + uint64_t sb_fdblocks; /* free data blocks */ + uint64_t sb_frextents; /* free realtime extents */ /* * End contiguous fields. */ xfs_ino_t sb_uquotino; /* user quota inode */ xfs_ino_t sb_gquotino; /* group quota inode */ - __uint16_t sb_qflags; /* quota flags */ - __uint8_t sb_flags; /* misc. flags */ - __uint8_t sb_shared_vn; /* shared version number */ + uint16_t sb_qflags; /* quota flags */ + uint8_t sb_flags; /* misc. flags */ + uint8_t sb_shared_vn; /* shared version number */ xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ - __uint32_t sb_unit; /* stripe or raid unit */ - __uint32_t sb_width; /* stripe or raid width */ - __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ - __uint8_t sb_logsectlog; /* log2 of the log sector size */ - __uint16_t sb_logsectsize; /* sector size for the log, bytes */ - __uint32_t sb_logsunit; /* stripe unit size for the log */ - __uint32_t sb_features2; /* additional feature bits */ + uint32_t sb_unit; /* stripe or raid unit */ + uint32_t sb_width; /* stripe or raid width */ + uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ + uint8_t sb_logsectlog; /* log2 of the log sector size */ + uint16_t sb_logsectsize; /* sector size for the log, bytes */ + uint32_t sb_logsunit; /* stripe unit size for the log */ + uint32_t sb_features2; /* additional feature bits */ /* * bad features2 field as a result of failing to pad the sb structure to @@ -167,17 +167,17 @@ typedef struct xfs_sb { * the value in sb_features2 when formatting the incore superblock to * the disk buffer. */ - __uint32_t sb_bad_features2; + uint32_t sb_bad_features2; /* version 5 superblock fields start here */ /* feature masks */ - __uint32_t sb_features_compat; - __uint32_t sb_features_ro_compat; - __uint32_t sb_features_incompat; - __uint32_t sb_features_log_incompat; + uint32_t sb_features_compat; + uint32_t sb_features_ro_compat; + uint32_t sb_features_incompat; + uint32_t sb_features_log_incompat; - __uint32_t sb_crc; /* superblock crc */ + uint32_t sb_crc; /* superblock crc */ xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */ xfs_ino_t sb_pquotino; /* project quota inode */ @@ -449,7 +449,7 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) static inline bool xfs_sb_has_compat_feature( struct xfs_sb *sbp, - __uint32_t feature) + uint32_t feature) { return (sbp->sb_features_compat & feature) != 0; } @@ -465,7 +465,7 @@ xfs_sb_has_compat_feature( static inline bool xfs_sb_has_ro_compat_feature( struct xfs_sb *sbp, - __uint32_t feature) + uint32_t feature) { return (sbp->sb_features_ro_compat & feature) != 0; } @@ -482,7 +482,7 @@ xfs_sb_has_ro_compat_feature( static inline bool xfs_sb_has_incompat_feature( struct xfs_sb *sbp, - __uint32_t feature) + uint32_t feature) { return (sbp->sb_features_incompat & feature) != 0; } @@ -492,7 +492,7 @@ xfs_sb_has_incompat_feature( static inline bool xfs_sb_has_incompat_log_feature( struct xfs_sb *sbp, - __uint32_t feature) + uint32_t feature) { return (sbp->sb_features_log_incompat & feature) != 0; } @@ -594,8 +594,8 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) */ #define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog) #define XFS_B_TO_FSB(mp,b) \ - ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) -#define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) + ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) +#define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) #define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) /* @@ -930,10 +930,8 @@ static inline uint xfs_dinode_size(int version) /* * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. * Since the pathconf interface is signed, we use 2^31 - 1 instead. - * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. */ #define XFS_MAXLINK ((1U << 31) - 1U) -#define XFS_MAXLINK_1 65535U /* * Values for di_format @@ -1074,7 +1072,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * next agno_log bits - ag number * high agno_log-agblklog-inopblog bits - 0 */ -#define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) +#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log @@ -1213,6 +1211,7 @@ struct xfs_dsymlink_hdr { #define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) +#define XFS_SYMLINK_MAXLEN 1024 /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 3 extents back from @@ -1271,16 +1270,16 @@ typedef __be32 xfs_alloc_ptr_t; #define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ #define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ -typedef __uint64_t xfs_inofree_t; +typedef uint64_t xfs_inofree_t; #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) #define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) #define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */ -#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t)) +#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(uint16_t)) #define XFS_INODES_PER_HOLEMASK_BIT \ - (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t))) + (XFS_INODES_PER_CHUNK / (NBBY * sizeof(uint16_t))) static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) { @@ -1314,9 +1313,9 @@ typedef struct xfs_inobt_rec { typedef struct xfs_inobt_rec_incore { xfs_agino_t ir_startino; /* starting inode number */ - __uint16_t ir_holemask; /* hole mask for sparse chunks */ - __uint8_t ir_count; /* total inode count */ - __uint8_t ir_freecount; /* count of free inodes (set bits) */ + uint16_t ir_holemask; /* hole mask for sparse chunks */ + uint8_t ir_count; /* total inode count */ + uint8_t ir_freecount; /* count of free inodes (set bits) */ xfs_inofree_t ir_free; /* free inode mask */ } xfs_inobt_rec_incore_t; @@ -1399,15 +1398,15 @@ struct xfs_rmap_rec { * rm_offset:54-60 aren't used and should be zero * rm_offset:0-53 is the block offset within the inode */ -#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63) -#define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62) -#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61) +#define XFS_RMAP_OFF_ATTR_FORK ((uint64_t)1ULL << 63) +#define XFS_RMAP_OFF_BMBT_BLOCK ((uint64_t)1ULL << 62) +#define XFS_RMAP_OFF_UNWRITTEN ((uint64_t)1ULL << 61) -#define XFS_RMAP_LEN_MAX ((__uint32_t)~0U) +#define XFS_RMAP_LEN_MAX ((uint32_t)~0U) #define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \ XFS_RMAP_OFF_BMBT_BLOCK | \ XFS_RMAP_OFF_UNWRITTEN) -#define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL) +#define XFS_RMAP_OFF_MASK ((uint64_t)0x3FFFFFFFFFFFFFULL) #define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK) @@ -1433,8 +1432,8 @@ struct xfs_rmap_rec { struct xfs_rmap_irec { xfs_agblock_t rm_startblock; /* extent start block */ xfs_extlen_t rm_blockcount; /* extent length */ - __uint64_t rm_owner; /* extent owner */ - __uint64_t rm_offset; /* offset within the owner */ + uint64_t rm_owner; /* extent owner */ + uint64_t rm_offset; /* offset within the owner */ unsigned int rm_flags; /* state flags */ }; @@ -1546,11 +1545,11 @@ typedef struct xfs_bmbt_rec { __be64 l0, l1; } xfs_bmbt_rec_t; -typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ +typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; typedef struct xfs_bmbt_rec_host { - __uint64_t l0, l1; + uint64_t l0, l1; } xfs_bmbt_rec_host_t; /* @@ -1578,19 +1577,10 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x) } /* - * Possible extent formats. - */ -typedef enum { - XFS_EXTFMT_NOSTATE = 0, - XFS_EXTFMT_HASSTATE -} xfs_exntfmt_t; - -/* * Possible extent states. */ typedef enum { XFS_EXT_NORM, XFS_EXT_UNWRITTEN, - XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID } xfs_exntst_t; /* diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b72dc821d78b..8c61f21535d4 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -92,6 +92,18 @@ struct getbmapx { #define BMV_OF_LAST 0x4 /* segment is the last in the file */ #define BMV_OF_SHARED 0x8 /* segment shared with another file */ +/* fmr_owner special values for FS_IOC_GETFSMAP */ +#define XFS_FMR_OWN_FREE FMR_OWN_FREE /* free space */ +#define XFS_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */ +#define XFS_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */ +#define XFS_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */ +#define XFS_FMR_OWN_AG FMR_OWNER('X', 3) /* per-AG metadata */ +#define XFS_FMR_OWN_INOBT FMR_OWNER('X', 4) /* inode btree blocks */ +#define XFS_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */ +#define XFS_FMR_OWN_REFC FMR_OWNER('X', 6) /* refcount tree */ +#define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */ +#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */ + /* * Structure for XFS_IOC_FSSETDM. * For use by backup and restore programs to set the XFS on-disk inode @@ -290,10 +302,10 @@ typedef struct xfs_bstat { * and using two 16bit values to hold new 32bit projid was choosen * to retain compatibility with "old" filesystems). */ -static inline __uint32_t +static inline uint32_t bstat_get_projid(struct xfs_bstat *bs) { - return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; + return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; } /* @@ -434,19 +446,15 @@ typedef struct xfs_handle { } xfs_handle_t; #define ha_fsid ha_u._ha_fsid -#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \ - - (char *) &(handle)) \ - + (handle).ha_fid.fid_len) - /* * Structure passed to XFS_IOC_SWAPEXT */ typedef struct xfs_swapext { - __int64_t sx_version; /* version */ + int64_t sx_version; /* version */ #define XFS_SX_VERSION 0 - __int64_t sx_fdtarget; /* fd of target file */ - __int64_t sx_fdtmp; /* fd of tmp file */ + int64_t sx_fdtarget; /* fd of target file */ + int64_t sx_fdtmp; /* fd of tmp file */ xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ @@ -502,6 +510,7 @@ typedef struct xfs_swapext #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) +/* XFS_IOC_GETFSMAP ------ hoisted 59 */ /* * ioctl commands that replace IRIX syssgi()'s @@ -533,7 +542,7 @@ typedef struct xfs_swapext #define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq) #define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq) #define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom) -#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t) +#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f272abff11e1..ffd5a15d1bb6 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -46,13 +46,12 @@ /* * Allocation group level functions. */ -static inline int +int xfs_ialloc_cluster_alignment( struct xfs_mount *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) return mp->m_sb.sb_inoalignmt; return 1; } @@ -99,24 +98,15 @@ xfs_inobt_update( return xfs_btree_update(cur, &rec); } -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_inobt_get_rec( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_inobt_rec_incore_t *irec, /* btree record */ - int *stat) /* output: success/failure */ +/* Convert on-disk btree record to incore inobt record. */ +void +xfs_inobt_btrec_to_irec( + struct xfs_mount *mp, + union xfs_btree_rec *rec, + struct xfs_inobt_rec_incore *irec) { - union xfs_btree_rec *rec; - int error; - - error = xfs_btree_get_rec(cur, &rec, stat); - if (error || *stat == 0) - return error; - irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); - if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + if (xfs_sb_version_hassparseinodes(&mp->m_sb)) { irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask); irec->ir_count = rec->inobt.ir_u.sp.ir_count; irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount; @@ -131,6 +121,25 @@ xfs_inobt_get_rec( be32_to_cpu(rec->inobt.ir_u.f.ir_freecount); } irec->ir_free = be64_to_cpu(rec->inobt.ir_free); +} + +/* + * Get the data from the pointed-to record. + */ +int +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *irec, + int *stat) +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (error || *stat == 0) + return error; + + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec); return 0; } @@ -141,9 +150,9 @@ xfs_inobt_get_rec( STATIC int xfs_inobt_insert_rec( struct xfs_btree_cur *cur, - __uint16_t holemask, - __uint8_t count, - __int32_t freecount, + uint16_t holemask, + uint8_t count, + int32_t freecount, xfs_inofree_t free, int *stat) { @@ -2543,8 +2552,7 @@ xfs_agi_read_verify( !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) xfs_buf_ioerror(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI, - XFS_RANDOM_IALLOC_READ_AGI)) + XFS_ERRTAG_IALLOC_READ_AGI)) xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 0bb89669fc07..b32cfb5aeb5b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -168,5 +168,10 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +union xfs_btree_rec; +void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, + struct xfs_inobt_rec_incore *irec); + +int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 7c471881c9a6..317caba9faa6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -175,6 +175,18 @@ xfs_inobt_init_key_from_rec( } STATIC void +xfs_inobt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + __u32 x; + + x = be32_to_cpu(rec->inobt.ir_startino); + x += XFS_INODES_PER_CHUNK - 1; + key->inobt.ir_startino = cpu_to_be32(x); +} + +STATIC void xfs_inobt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) @@ -219,15 +231,25 @@ xfs_finobt_init_ptr_from_cur( ptr->s = agi->agi_free_root; } -STATIC __int64_t +STATIC int64_t xfs_inobt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) { - return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - + return (int64_t)be32_to_cpu(key->inobt.ir_startino) - cur->bc_rec.i.ir_startino; } +STATIC int64_t +xfs_inobt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - + be32_to_cpu(k2->inobt.ir_startino); +} + static int xfs_inobt_verify( struct xfs_buf *bp) @@ -302,7 +324,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = { .verify_write = xfs_inobt_write_verify, }; -#if defined(DEBUG) || defined(XFS_WARN) STATIC int xfs_inobt_keys_inorder( struct xfs_btree_cur *cur, @@ -322,7 +343,6 @@ xfs_inobt_recs_inorder( return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= be32_to_cpu(r2->inobt.ir_startino); } -#endif /* DEBUG */ static const struct xfs_btree_ops xfs_inobt_ops = { .rec_len = sizeof(xfs_inobt_rec_t), @@ -335,14 +355,14 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, .buf_ops = &xfs_inobt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) + .diff_two_keys = xfs_inobt_diff_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, -#endif }; static const struct xfs_btree_ops xfs_finobt_ops = { @@ -356,14 +376,14 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, .buf_ops = &xfs_inobt_buf_ops, -#if defined(DEBUG) || defined(XFS_WARN) + .diff_two_keys = xfs_inobt_diff_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, -#endif }; /* diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index d93f9d918cfc..378f8fbc91a7 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -105,8 +105,7 @@ xfs_inode_buf_verify( di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && xfs_dinode_good_version(mp, dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, - XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { + XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; xfs_buf_ioerror(bp, -EIO); @@ -381,7 +380,7 @@ xfs_log_dinode_to_disk( } } -static bool +bool xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, @@ -444,7 +443,7 @@ xfs_dinode_calc_crc( struct xfs_mount *mp, struct xfs_dinode *dip) { - __uint32_t crc; + uint32_t crc; if (dip->di_version < 3) return; @@ -508,7 +507,7 @@ xfs_iread( /* even unallocated inodes are verified */ if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld failed", + xfs_alert(mp, "%s: validation failed for inode %lld", __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 6848a0afbce7..a9c97a356c30 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -28,26 +28,26 @@ struct xfs_dinode; * format specific structures at the appropriate time. */ struct xfs_icdinode { - __int8_t di_version; /* inode version */ - __int8_t di_format; /* format of di_c data */ - __uint16_t di_flushiter; /* incremented on flush */ - __uint32_t di_uid; /* owner's user id */ - __uint32_t di_gid; /* owner's group id */ - __uint16_t di_projid_lo; /* lower part of owner's project id */ - __uint16_t di_projid_hi; /* higher part of owner's project id */ + int8_t di_version; /* inode version */ + int8_t di_format; /* format of di_c data */ + uint16_t di_flushiter; /* incremented on flush */ + uint32_t di_uid; /* owner's user id */ + uint32_t di_gid; /* owner's group id */ + uint16_t di_projid_lo; /* lower part of owner's project id */ + uint16_t di_projid_hi; /* higher part of owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ xfs_extnum_t di_nextents; /* number of extents in data fork */ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ - __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ - __int8_t di_aformat; /* format of attr fork's data */ - __uint32_t di_dmevmask; /* DMIG event mask */ - __uint16_t di_dmstate; /* DMIG state info */ - __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ + uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ + int8_t di_aformat; /* format of attr fork's data */ + uint32_t di_dmevmask; /* DMIG event mask */ + uint16_t di_dmstate; /* DMIG state info */ + uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ - __uint64_t di_flags2; /* more random flags */ - __uint32_t di_cowextsize; /* basic cow extent size for file */ + uint64_t di_flags2; /* more random flags */ + uint32_t di_cowextsize; /* basic cow extent size for file */ xfs_ictimestamp_t di_crtime; /* time created */ }; @@ -82,4 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ +bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_dinode *dip); + #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 222e103356c6..0e80f34fe97c 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,12 +26,15 @@ #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" +#include "xfs_btree.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_attr_sf.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_ifork_zone; @@ -39,35 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); -#ifdef DEBUG -/* - * Make sure that the extents in the given memory buffer - * are valid. - */ -void -xfs_validate_extents( - xfs_ifork_t *ifp, - int nrecs, - xfs_exntfmt_t fmt) -{ - xfs_bmbt_irec_t irec; - xfs_bmbt_rec_host_t rec; - int i; - - for (i = 0; i < nrecs; i++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - rec.l0 = get_unaligned(&ep->l0); - rec.l1 = get_unaligned(&ep->l1); - xfs_bmbt_get_all(&rec, &irec); - if (fmt == XFS_EXTFMT_NOSTATE) - ASSERT(irec.br_state == XFS_EXT_NORM); - } -} -#else /* DEBUG */ -#define xfs_validate_extents(ifp, nrecs, fmt) -#endif /* DEBUG */ - - /* * Move inode type and inode format specific information from the * on-disk inode to the in-core inode. For fifos, devs, and sockets @@ -209,6 +183,16 @@ xfs_iformat_fork( if (error) return error; + /* Check inline dir contents. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + dip->di_format == XFS_DINODE_FMT_LOCAL) { + error = xfs_dir2_sf_verify(ip); + if (error) { + xfs_idestroy_fork(ip, XFS_DATA_FORK); + return error; + } + } + if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -319,7 +303,6 @@ xfs_iformat_local( int whichfork, int size) { - /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in @@ -340,40 +323,33 @@ xfs_iformat_local( } /* - * The file consists of a set of extents all - * of which fit into the on-disk inode. - * If there are few enough extents to fit into - * the if_inline_ext, then copy them there. - * Otherwise allocate a buffer for them and copy - * them into it. Either way, set if_extents - * to point at the extents. + * The file consists of a set of extents all of which fit into the on-disk + * inode. If there are few enough extents to fit into the if_inline_ext, then + * copy them there. Otherwise allocate a buffer for them and copy them into it. + * Either way, set if_extents to point at the extents. */ STATIC int xfs_iformat_extents( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork) + struct xfs_inode *ip, + struct xfs_dinode *dip, + int whichfork) { - xfs_bmbt_rec_t *dp; - xfs_ifork_t *ifp; - int nex; - int size; - int i; - - ifp = XFS_IFORK_PTR(ip, whichfork); - nex = XFS_DFORK_NEXTENTS(dip, whichfork); - size = nex * (uint)sizeof(xfs_bmbt_rec_t); + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int nex = XFS_DFORK_NEXTENTS(dip, whichfork); + int size = nex * sizeof(xfs_bmbt_rec_t); + struct xfs_bmbt_rec *dp; + int i; /* - * If the number of extents is unreasonable, then something - * is wrong and we just bail out rather than crash in - * kmem_alloc() or memcpy() below. + * If the number of extents is unreasonable, then something is wrong and + * we just bail out rather than crash in kmem_alloc() or memcpy() below. */ - if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); + mp, dip); return -EFSCORRUPTED; } @@ -388,22 +364,17 @@ xfs_iformat_extents( ifp->if_bytes = size; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); - xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); for (i = 0; i < nex; i++, dp++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); ep->l0 = get_unaligned_be64(&dp->l0); ep->l1 = get_unaligned_be64(&dp->l1); + if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) { + XFS_ERROR_REPORT("xfs_iformat_extents(2)", + XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } } XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); - if (whichfork != XFS_DATA_FORK || - XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) - if (unlikely(xfs_check_nostate_extents( - ifp, 0, nex))) { - XFS_ERROR_REPORT("xfs_iformat_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); - return -EFSCORRUPTED; - } } ifp->if_flags |= XFS_IFEXTENTS; return 0; @@ -429,11 +400,13 @@ xfs_iformat_btree( /* REFERENCED */ int nrecs; int size; + int level; ifp = XFS_IFORK_PTR(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = XFS_BMAP_BROOT_SPACE(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); + level = be16_to_cpu(dfp->bb_level); /* * blow out if -- fork has less extents than can fit in @@ -446,7 +419,8 @@ xfs_iformat_btree( XFS_IFORK_MAXEXT(ip, whichfork) || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || + level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, @@ -497,15 +471,13 @@ xfs_iread_extents( * We know that the size is valid (it's checked in iformat_btree) */ ifp->if_bytes = ifp->if_real_bytes = 0; - ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); error = xfs_bmap_read_extents(tp, ip, whichfork); if (error) { xfs_iext_destroy(ifp); - ifp->if_flags &= ~XFS_IFEXTENTS; return error; } - xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); + ifp->if_flags |= XFS_IFEXTENTS; return 0; } /* @@ -823,6 +795,9 @@ xfs_iextents_copy( copied = 0; for (i = 0; i < nrecs; i++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); + + ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep)); + start_block = xfs_bmbt_get_startblock(ep); if (isnullstartblock(start_block)) { /* @@ -838,7 +813,6 @@ xfs_iextents_copy( copied++; } ASSERT(copied != 0); - xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); return (copied * (uint)sizeof(xfs_bmbt_rec_t)); } diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 7ae571f8e34a..8372e9bcd7b6 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -31,7 +31,7 @@ struct xfs_trans_res; * through all the log items definitions and everything they encode into the * log. */ -typedef __uint32_t xlog_tid_t; +typedef uint32_t xlog_tid_t; #define XLOG_MIN_ICLOGS 2 #define XLOG_MAX_ICLOGS 8 @@ -211,7 +211,7 @@ typedef struct xfs_log_iovec { typedef struct xfs_trans_header { uint th_magic; /* magic number */ uint th_type; /* transaction type */ - __int32_t th_tid; /* transaction id (unused) */ + int32_t th_tid; /* transaction id (unused) */ uint th_num_items; /* num items logged by trans */ } xfs_trans_header_t; @@ -265,52 +265,52 @@ typedef struct xfs_trans_header { * must be added on to the end. */ typedef struct xfs_inode_log_format { - __uint16_t ilf_type; /* inode log item type */ - __uint16_t ilf_size; /* size of this item */ - __uint32_t ilf_fields; /* flags for fields logged */ - __uint16_t ilf_asize; /* size of attr d/ext/root */ - __uint16_t ilf_dsize; /* size of data/ext/root */ - __uint64_t ilf_ino; /* inode number */ + uint16_t ilf_type; /* inode log item type */ + uint16_t ilf_size; /* size of this item */ + uint32_t ilf_fields; /* flags for fields logged */ + uint16_t ilf_asize; /* size of attr d/ext/root */ + uint16_t ilf_dsize; /* size of data/ext/root */ + uint64_t ilf_ino; /* inode number */ union { - __uint32_t ilfu_rdev; /* rdev value for dev inode*/ + uint32_t ilfu_rdev; /* rdev value for dev inode*/ uuid_t ilfu_uuid; /* mount point value */ } ilf_u; - __int64_t ilf_blkno; /* blkno of inode buffer */ - __int32_t ilf_len; /* len of inode buffer */ - __int32_t ilf_boffset; /* off of inode in buffer */ + int64_t ilf_blkno; /* blkno of inode buffer */ + int32_t ilf_len; /* len of inode buffer */ + int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; typedef struct xfs_inode_log_format_32 { - __uint16_t ilf_type; /* inode log item type */ - __uint16_t ilf_size; /* size of this item */ - __uint32_t ilf_fields; /* flags for fields logged */ - __uint16_t ilf_asize; /* size of attr d/ext/root */ - __uint16_t ilf_dsize; /* size of data/ext/root */ - __uint64_t ilf_ino; /* inode number */ + uint16_t ilf_type; /* inode log item type */ + uint16_t ilf_size; /* size of this item */ + uint32_t ilf_fields; /* flags for fields logged */ + uint16_t ilf_asize; /* size of attr d/ext/root */ + uint16_t ilf_dsize; /* size of data/ext/root */ + uint64_t ilf_ino; /* inode number */ union { - __uint32_t ilfu_rdev; /* rdev value for dev inode*/ + uint32_t ilfu_rdev; /* rdev value for dev inode*/ uuid_t ilfu_uuid; /* mount point value */ } ilf_u; - __int64_t ilf_blkno; /* blkno of inode buffer */ - __int32_t ilf_len; /* len of inode buffer */ - __int32_t ilf_boffset; /* off of inode in buffer */ + int64_t ilf_blkno; /* blkno of inode buffer */ + int32_t ilf_len; /* len of inode buffer */ + int32_t ilf_boffset; /* off of inode in buffer */ } __attribute__((packed)) xfs_inode_log_format_32_t; typedef struct xfs_inode_log_format_64 { - __uint16_t ilf_type; /* inode log item type */ - __uint16_t ilf_size; /* size of this item */ - __uint32_t ilf_fields; /* flags for fields logged */ - __uint16_t ilf_asize; /* size of attr d/ext/root */ - __uint16_t ilf_dsize; /* size of data/ext/root */ - __uint32_t ilf_pad; /* pad for 64 bit boundary */ - __uint64_t ilf_ino; /* inode number */ + uint16_t ilf_type; /* inode log item type */ + uint16_t ilf_size; /* size of this item */ + uint32_t ilf_fields; /* flags for fields logged */ + uint16_t ilf_asize; /* size of attr d/ext/root */ + uint16_t ilf_dsize; /* size of data/ext/root */ + uint32_t ilf_pad; /* pad for 64 bit boundary */ + uint64_t ilf_ino; /* inode number */ union { - __uint32_t ilfu_rdev; /* rdev value for dev inode*/ + uint32_t ilfu_rdev; /* rdev value for dev inode*/ uuid_t ilfu_uuid; /* mount point value */ } ilf_u; - __int64_t ilf_blkno; /* blkno of inode buffer */ - __int32_t ilf_len; /* len of inode buffer */ - __int32_t ilf_boffset; /* off of inode in buffer */ + int64_t ilf_blkno; /* blkno of inode buffer */ + int32_t ilf_len; /* len of inode buffer */ + int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_64_t; @@ -379,8 +379,8 @@ static inline int xfs_ilog_fdata(int w) * information. */ typedef struct xfs_ictimestamp { - __int32_t t_sec; /* timestamp seconds */ - __int32_t t_nsec; /* timestamp nanoseconds */ + int32_t t_sec; /* timestamp seconds */ + int32_t t_nsec; /* timestamp nanoseconds */ } xfs_ictimestamp_t; /* @@ -388,18 +388,18 @@ typedef struct xfs_ictimestamp { * kept identical to struct xfs_dinode except for the endianness annotations. */ struct xfs_log_dinode { - __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ - __uint16_t di_mode; /* mode and type of file */ - __int8_t di_version; /* inode version */ - __int8_t di_format; /* format of di_c data */ - __uint8_t di_pad3[2]; /* unused in v2/3 inodes */ - __uint32_t di_uid; /* owner's user id */ - __uint32_t di_gid; /* owner's group id */ - __uint32_t di_nlink; /* number of links to file */ - __uint16_t di_projid_lo; /* lower part of owner's project id */ - __uint16_t di_projid_hi; /* higher part of owner's project id */ - __uint8_t di_pad[6]; /* unused, zeroed space */ - __uint16_t di_flushiter; /* incremented on flush */ + uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ + uint16_t di_mode; /* mode and type of file */ + int8_t di_version; /* inode version */ + int8_t di_format; /* format of di_c data */ + uint8_t di_pad3[2]; /* unused in v2/3 inodes */ + uint32_t di_uid; /* owner's user id */ + uint32_t di_gid; /* owner's group id */ + uint32_t di_nlink; /* number of links to file */ + uint16_t di_projid_lo; /* lower part of owner's project id */ + uint16_t di_projid_hi; /* higher part of owner's project id */ + uint8_t di_pad[6]; /* unused, zeroed space */ + uint16_t di_flushiter; /* incremented on flush */ xfs_ictimestamp_t di_atime; /* time last accessed */ xfs_ictimestamp_t di_mtime; /* time last modified */ xfs_ictimestamp_t di_ctime; /* time created/inode modified */ @@ -408,23 +408,23 @@ struct xfs_log_dinode { xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ xfs_extnum_t di_nextents; /* number of extents in data fork */ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ - __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ - __int8_t di_aformat; /* format of attr fork's data */ - __uint32_t di_dmevmask; /* DMIG event mask */ - __uint16_t di_dmstate; /* DMIG state info */ - __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ - __uint32_t di_gen; /* generation number */ + uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ + int8_t di_aformat; /* format of attr fork's data */ + uint32_t di_dmevmask; /* DMIG event mask */ + uint16_t di_dmstate; /* DMIG state info */ + uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ + uint32_t di_gen; /* generation number */ /* di_next_unlinked is the only non-core field in the old dinode */ xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */ /* start of the extended dinode, writable fields */ - __uint32_t di_crc; /* CRC of the inode */ - __uint64_t di_changecount; /* number of attribute changes */ + uint32_t di_crc; /* CRC of the inode */ + uint64_t di_changecount; /* number of attribute changes */ xfs_lsn_t di_lsn; /* flush sequence */ - __uint64_t di_flags2; /* more random flags */ - __uint32_t di_cowextsize; /* basic cow extent size for file */ - __uint8_t di_pad2[12]; /* more padding for future expansion */ + uint64_t di_flags2; /* more random flags */ + uint32_t di_cowextsize; /* basic cow extent size for file */ + uint8_t di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_ictimestamp_t di_crtime; /* time created */ @@ -483,7 +483,7 @@ typedef struct xfs_buf_log_format { unsigned short blf_size; /* size of this item */ unsigned short blf_flags; /* misc state */ unsigned short blf_len; /* number of blocks in this buf */ - __int64_t blf_blkno; /* starting blkno of this buf */ + int64_t blf_blkno; /* starting blkno of this buf */ unsigned int blf_map_size; /* used size of data bitmap in words */ unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ } xfs_buf_log_format_t; @@ -533,7 +533,7 @@ xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type) blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK); } -static inline __uint16_t +static inline uint16_t xfs_blft_from_flags(struct xfs_buf_log_format *blf) { return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT; @@ -554,14 +554,14 @@ typedef struct xfs_extent { * conversion routine. */ typedef struct xfs_extent_32 { - __uint64_t ext_start; - __uint32_t ext_len; + uint64_t ext_start; + uint32_t ext_len; } __attribute__((packed)) xfs_extent_32_t; typedef struct xfs_extent_64 { - __uint64_t ext_start; - __uint32_t ext_len; - __uint32_t ext_pad; + uint64_t ext_start; + uint32_t ext_len; + uint32_t ext_pad; } xfs_extent_64_t; /* @@ -570,26 +570,26 @@ typedef struct xfs_extent_64 { * size is given by efi_nextents. */ typedef struct xfs_efi_log_format { - __uint16_t efi_type; /* efi log item type */ - __uint16_t efi_size; /* size of this item */ - __uint32_t efi_nextents; /* # extents to free */ - __uint64_t efi_id; /* efi identifier */ + uint16_t efi_type; /* efi log item type */ + uint16_t efi_size; /* size of this item */ + uint32_t efi_nextents; /* # extents to free */ + uint64_t efi_id; /* efi identifier */ xfs_extent_t efi_extents[1]; /* array of extents to free */ } xfs_efi_log_format_t; typedef struct xfs_efi_log_format_32 { - __uint16_t efi_type; /* efi log item type */ - __uint16_t efi_size; /* size of this item */ - __uint32_t efi_nextents; /* # extents to free */ - __uint64_t efi_id; /* efi identifier */ + uint16_t efi_type; /* efi log item type */ + uint16_t efi_size; /* size of this item */ + uint32_t efi_nextents; /* # extents to free */ + uint64_t efi_id; /* efi identifier */ xfs_extent_32_t efi_extents[1]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; typedef struct xfs_efi_log_format_64 { - __uint16_t efi_type; /* efi log item type */ - __uint16_t efi_size; /* size of this item */ - __uint32_t efi_nextents; /* # extents to free */ - __uint64_t efi_id; /* efi identifier */ + uint16_t efi_type; /* efi log item type */ + uint16_t efi_size; /* size of this item */ + uint32_t efi_nextents; /* # extents to free */ + uint64_t efi_id; /* efi identifier */ xfs_extent_64_t efi_extents[1]; /* array of extents to free */ } xfs_efi_log_format_64_t; @@ -599,26 +599,26 @@ typedef struct xfs_efi_log_format_64 { * size is given by efd_nextents; */ typedef struct xfs_efd_log_format { - __uint16_t efd_type; /* efd log item type */ - __uint16_t efd_size; /* size of this item */ - __uint32_t efd_nextents; /* # of extents freed */ - __uint64_t efd_efi_id; /* id of corresponding efi */ + uint16_t efd_type; /* efd log item type */ + uint16_t efd_size; /* size of this item */ + uint32_t efd_nextents; /* # of extents freed */ + uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_t efd_extents[1]; /* array of extents freed */ } xfs_efd_log_format_t; typedef struct xfs_efd_log_format_32 { - __uint16_t efd_type; /* efd log item type */ - __uint16_t efd_size; /* size of this item */ - __uint32_t efd_nextents; /* # of extents freed */ - __uint64_t efd_efi_id; /* id of corresponding efi */ + uint16_t efd_type; /* efd log item type */ + uint16_t efd_size; /* size of this item */ + uint32_t efd_nextents; /* # of extents freed */ + uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_32_t efd_extents[1]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; typedef struct xfs_efd_log_format_64 { - __uint16_t efd_type; /* efd log item type */ - __uint16_t efd_size; /* size of this item */ - __uint32_t efd_nextents; /* # of extents freed */ - __uint64_t efd_efi_id; /* id of corresponding efi */ + uint16_t efd_type; /* efd log item type */ + uint16_t efd_size; /* size of this item */ + uint32_t efd_nextents; /* # of extents freed */ + uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_64_t efd_extents[1]; /* array of extents freed */ } xfs_efd_log_format_64_t; @@ -626,11 +626,11 @@ typedef struct xfs_efd_log_format_64 { * RUI/RUD (reverse mapping) log format definitions */ struct xfs_map_extent { - __uint64_t me_owner; - __uint64_t me_startblock; - __uint64_t me_startoff; - __uint32_t me_len; - __uint32_t me_flags; + uint64_t me_owner; + uint64_t me_startblock; + uint64_t me_startoff; + uint32_t me_len; + uint32_t me_flags; }; /* rmap me_flags: upper bits are flags, lower byte is type code */ @@ -659,10 +659,10 @@ struct xfs_map_extent { * size is given by rui_nextents. */ struct xfs_rui_log_format { - __uint16_t rui_type; /* rui log item type */ - __uint16_t rui_size; /* size of this item */ - __uint32_t rui_nextents; /* # extents to free */ - __uint64_t rui_id; /* rui identifier */ + uint16_t rui_type; /* rui log item type */ + uint16_t rui_size; /* size of this item */ + uint32_t rui_nextents; /* # extents to free */ + uint64_t rui_id; /* rui identifier */ struct xfs_map_extent rui_extents[]; /* array of extents to rmap */ }; @@ -680,19 +680,19 @@ xfs_rui_log_format_sizeof( * size is given by rud_nextents; */ struct xfs_rud_log_format { - __uint16_t rud_type; /* rud log item type */ - __uint16_t rud_size; /* size of this item */ - __uint32_t __pad; - __uint64_t rud_rui_id; /* id of corresponding rui */ + uint16_t rud_type; /* rud log item type */ + uint16_t rud_size; /* size of this item */ + uint32_t __pad; + uint64_t rud_rui_id; /* id of corresponding rui */ }; /* * CUI/CUD (refcount update) log format definitions */ struct xfs_phys_extent { - __uint64_t pe_startblock; - __uint32_t pe_len; - __uint32_t pe_flags; + uint64_t pe_startblock; + uint32_t pe_len; + uint32_t pe_flags; }; /* refcount pe_flags: upper bits are flags, lower byte is type code */ @@ -707,10 +707,10 @@ struct xfs_phys_extent { * size is given by cui_nextents. */ struct xfs_cui_log_format { - __uint16_t cui_type; /* cui log item type */ - __uint16_t cui_size; /* size of this item */ - __uint32_t cui_nextents; /* # extents to free */ - __uint64_t cui_id; /* cui identifier */ + uint16_t cui_type; /* cui log item type */ + uint16_t cui_size; /* size of this item */ + uint32_t cui_nextents; /* # extents to free */ + uint64_t cui_id; /* cui identifier */ struct xfs_phys_extent cui_extents[]; /* array of extents */ }; @@ -728,10 +728,10 @@ xfs_cui_log_format_sizeof( * size is given by cud_nextents; */ struct xfs_cud_log_format { - __uint16_t cud_type; /* cud log item type */ - __uint16_t cud_size; /* size of this item */ - __uint32_t __pad; - __uint64_t cud_cui_id; /* id of corresponding cui */ + uint16_t cud_type; /* cud log item type */ + uint16_t cud_size; /* size of this item */ + uint32_t __pad; + uint64_t cud_cui_id; /* id of corresponding cui */ }; /* @@ -755,10 +755,10 @@ struct xfs_cud_log_format { * size is given by bui_nextents. */ struct xfs_bui_log_format { - __uint16_t bui_type; /* bui log item type */ - __uint16_t bui_size; /* size of this item */ - __uint32_t bui_nextents; /* # extents to free */ - __uint64_t bui_id; /* bui identifier */ + uint16_t bui_type; /* bui log item type */ + uint16_t bui_size; /* size of this item */ + uint32_t bui_nextents; /* # extents to free */ + uint64_t bui_id; /* bui identifier */ struct xfs_map_extent bui_extents[]; /* array of extents to bmap */ }; @@ -776,10 +776,10 @@ xfs_bui_log_format_sizeof( * size is given by bud_nextents; */ struct xfs_bud_log_format { - __uint16_t bud_type; /* bud log item type */ - __uint16_t bud_size; /* size of this item */ - __uint32_t __pad; - __uint64_t bud_bui_id; /* id of corresponding bui */ + uint16_t bud_type; /* bud log item type */ + uint16_t bud_size; /* size of this item */ + uint32_t __pad; + uint64_t bud_bui_id; /* id of corresponding bui */ }; /* @@ -789,12 +789,12 @@ struct xfs_bud_log_format { * 32 bits : log_recovery code assumes that. */ typedef struct xfs_dq_logformat { - __uint16_t qlf_type; /* dquot log item type */ - __uint16_t qlf_size; /* size of this item */ + uint16_t qlf_type; /* dquot log item type */ + uint16_t qlf_size; /* size of this item */ xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ - __int64_t qlf_blkno; /* blkno of dquot buffer */ - __int32_t qlf_len; /* len of dquot buffer */ - __uint32_t qlf_boffset; /* off of dquot in buffer */ + int64_t qlf_blkno; /* blkno of dquot buffer */ + int32_t qlf_len; /* len of dquot buffer */ + uint32_t qlf_boffset; /* off of dquot in buffer */ } xfs_dq_logformat_t; /* @@ -853,8 +853,8 @@ typedef struct xfs_qoff_logformat { * decoding can be done correctly. */ struct xfs_icreate_log { - __uint16_t icl_type; /* type of log format structure */ - __uint16_t icl_size; /* size of log format structure */ + uint16_t icl_type; /* type of log format structure */ + uint16_t icl_size; /* size of log format structure */ __be32 icl_ag; /* ag being allocated in */ __be32 icl_agbno; /* start block of inode range */ __be32 icl_count; /* number of inodes to initialise */ diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index d9f65e2d5cc8..66948a9fd486 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -26,7 +26,7 @@ #define XLOG_RHASH_SIZE 16 #define XLOG_RHASH_SHIFT 2 #define XLOG_RHASH(tid) \ - ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) + ((((uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1) @@ -42,7 +42,6 @@ typedef struct xlog_recover_item { xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; -struct xlog_tid; typedef struct xlog_recover { struct hlist_node r_list; xlog_tid_t r_log_tid; /* log's transaction id */ diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 8eed51275bb3..d69c772271cb 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -27,8 +27,8 @@ * they may need 64-bit accounting. Hence, 64-bit quota-counters, * and quota-limits. This is a waste in the common case, but hey ... */ -typedef __uint64_t xfs_qcnt_t; -typedef __uint16_t xfs_qwarncnt_t; +typedef uint64_t xfs_qcnt_t; +typedef uint16_t xfs_qwarncnt_t; /* * flags for q_flags field in the dquot. diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index b177ef33cd4c..900ea231f9a3 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -784,14 +784,6 @@ xfs_refcount_merge_extents( } /* - * While we're adjusting the refcounts records of an extent, we have - * to keep an eye on the number of extents we're dirtying -- run too - * many in a single transaction and we'll exceed the transaction's - * reservation and crash the fs. Each record adds 12 bytes to the - * log (plus any key updates) so we'll conservatively assume 24 bytes - * per record. We must also leave space for btree splits on both ends - * of the range and space for the CUD and a new CUI. - * * XXX: This is a pretty hand-wavy estimate. The penalty for guessing * true incorrectly is a shutdown FS; the penalty for guessing false * incorrectly is more transaction rolls than might be necessary. @@ -813,8 +805,7 @@ xfs_refcount_still_have_space( */ if (cur->bc_private.a.priv.refc.nr_ops > 2 && XFS_TEST_ERROR(false, cur->bc_mp, - XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE, - XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE)) + XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) return false; if (cur->bc_private.a.priv.refc.nr_ops == 0) @@ -822,7 +813,7 @@ xfs_refcount_still_have_space( else if (overhead > cur->bc_tp->t_log_res) return false; return cur->bc_tp->t_log_res - overhead > - cur->bc_private.a.priv.refc.nr_ops * 32; + cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* @@ -1076,8 +1067,7 @@ xfs_refcount_finish_one( blockcount); if (XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_REFCOUNT_FINISH_ONE, - XFS_RANDOM_REFCOUNT_FINISH_ONE)) + XFS_ERRTAG_REFCOUNT_FINISH_ONE)) return -EIO; /* @@ -1629,13 +1619,28 @@ xfs_refcount_recover_cow_leftovers( if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) return -EOPNOTSUPP; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + INIT_LIST_HEAD(&debris); + + /* + * In this first part, we use an empty transaction to gather up + * all the leftover CoW extents so that we can subsequently + * delete them. The empty transaction is used to avoid + * a buffer lock deadlock if there happens to be a loop in the + * refcountbt because we're allowed to re-grab a buffer that is + * already attached to our transaction. When we're done + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ + error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (error) + goto out_trans; + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ - INIT_LIST_HEAD(&debris); memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); low.rc.rc_startblock = XFS_REFC_COW_START; @@ -1645,10 +1650,11 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_cursor; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); + xfs_trans_cancel(tp); /* Now iterate the list to free the leftovers */ - list_for_each_entry(rr, &debris, rr_list) { + list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) @@ -1676,8 +1682,16 @@ xfs_refcount_recover_cow_leftovers( error = xfs_trans_commit(tp); if (error) goto out_free; + + list_del(&rr->rr_list); + kmem_free(rr); } + return error; +out_defer: + xfs_defer_cancel(&dfops); +out_trans: + xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1688,11 +1702,6 @@ out_free: out_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_buf_relse(agbp); - goto out_free; - -out_defer: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_free; + xfs_trans_brelse(tp, agbp); + goto out_trans; } diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 098dc668ab2c..eafb9d1f3b37 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp, extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, xfs_agnumber_t agno); +/* + * While we're adjusting the refcounts records of an extent, we have + * to keep an eye on the number of extents we're dirtying -- run too + * many in a single transaction and we'll exceed the transaction's + * reservation and crash the fs. Each record adds 12 bytes to the + * log (plus any key updates) so we'll conservatively assume 32 bytes + * per record. We must also leave space for btree splits on both ends + * of the range and space for the CUD and a new CUI. + */ +#define XFS_REFCOUNT_ITEM_OVERHEAD 32 + +static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) +{ + return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; +} + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 50add5272807..3c59dd3d58d7 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -202,7 +202,7 @@ xfs_refcountbt_init_ptr_from_cur( ptr->s = agf->agf_refcount_root; } -STATIC __int64_t +STATIC int64_t xfs_refcountbt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) @@ -210,16 +210,16 @@ xfs_refcountbt_key_diff( struct xfs_refcount_irec *rec = &cur->bc_rec.rc; struct xfs_refcount_key *kp = &key->refc; - return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; + return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; } -STATIC __int64_t +STATIC int64_t xfs_refcountbt_diff_two_keys( struct xfs_btree_cur *cur, union xfs_btree_key *k1, union xfs_btree_key *k2) { - return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) - + return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - be32_to_cpu(k2->refc.rc_startblock); } @@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { .verify_write = xfs_refcountbt_write_verify, }; -#if defined(DEBUG) || defined(XFS_WARN) STATIC int xfs_refcountbt_keys_inorder( struct xfs_btree_cur *cur, @@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder( be32_to_cpu(r1->refc.rc_blockcount) <= be32_to_cpu(r2->refc.rc_startblock); } -#endif static const struct xfs_btree_ops xfs_refcountbt_ops = { .rec_len = sizeof(struct xfs_refcount_rec), @@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = { .key_diff = xfs_refcountbt_key_diff, .buf_ops = &xfs_refcountbt_buf_ops, .diff_two_keys = xfs_refcountbt_diff_two_keys, -#if defined(DEBUG) || defined(XFS_WARN) .keys_inorder = xfs_refcountbt_keys_inorder, .recs_inorder = xfs_refcountbt_recs_inorder, -#endif }; /* diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 3a8cc7139912..55c88a732690 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -179,7 +179,8 @@ done: return error; } -static int +/* Convert an internal btree record to an rmap record. */ +int xfs_rmap_btrec_to_irec( union xfs_btree_rec *rec, struct xfs_rmap_irec *irec) @@ -2001,14 +2002,14 @@ xfs_rmap_query_range_helper( /* Find all rmaps between two keys. */ int xfs_rmap_query_range( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *low_rec, - struct xfs_rmap_irec *high_rec, - xfs_rmap_query_range_fn fn, - void *priv) + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *low_rec, + struct xfs_rmap_irec *high_rec, + xfs_rmap_query_range_fn fn, + void *priv) { - union xfs_btree_irec low_brec; - union xfs_btree_irec high_brec; + union xfs_btree_irec low_brec; + union xfs_btree_irec high_brec; struct xfs_rmap_query_range_info query; low_brec.r = *low_rec; @@ -2019,6 +2020,20 @@ xfs_rmap_query_range( xfs_rmap_query_range_helper, &query); } +/* Find all rmaps. */ +int +xfs_rmap_query_all( + struct xfs_btree_cur *cur, + xfs_rmap_query_range_fn fn, + void *priv) +{ + struct xfs_rmap_query_range_info query; + + query.priv = priv; + query.fn = fn; + return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query); +} + /* Clean up after calling xfs_rmap_finish_one. */ void xfs_rmap_finish_one_cleanup( @@ -2047,7 +2062,7 @@ int xfs_rmap_finish_one( struct xfs_trans *tp, enum xfs_rmap_intent_type type, - __uint64_t owner, + uint64_t owner, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, @@ -2072,8 +2087,7 @@ xfs_rmap_finish_one( startoff, blockcount, state); if (XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_RMAP_FINISH_ONE, - XFS_RANDOM_RMAP_FINISH_ONE)) + XFS_ERRTAG_RMAP_FINISH_ONE)) return -EIO; /* @@ -2168,7 +2182,7 @@ __xfs_rmap_add( struct xfs_mount *mp, struct xfs_defer_ops *dfops, enum xfs_rmap_intent_type type, - __uint64_t owner, + uint64_t owner, int whichfork, struct xfs_bmbt_irec *bmap) { @@ -2252,7 +2266,7 @@ xfs_rmap_alloc_extent( xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - __uint64_t owner) + uint64_t owner) { struct xfs_bmbt_irec bmap; @@ -2276,7 +2290,7 @@ xfs_rmap_free_extent( xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - __uint64_t owner) + uint64_t owner) { struct xfs_bmbt_irec bmap; @@ -2291,3 +2305,31 @@ xfs_rmap_free_extent( return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); } + +/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ +int +xfs_rmap_compare( + const struct xfs_rmap_irec *a, + const struct xfs_rmap_irec *b) +{ + __u64 oa; + __u64 ob; + + oa = xfs_rmap_irec_offset_pack(a); + ob = xfs_rmap_irec_offset_pack(b); + + if (a->rm_startblock < b->rm_startblock) + return -1; + else if (a->rm_startblock > b->rm_startblock) + return 1; + else if (a->rm_owner < b->rm_owner) + return -1; + else if (a->rm_owner > b->rm_owner) + return 1; + else if (oa < ob) + return -1; + else if (oa > ob) + return 1; + else + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 789930599339..466ede637080 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)( int xfs_rmap_query_range(struct xfs_btree_cur *cur, struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, xfs_rmap_query_range_fn fn, void *priv); +int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn, + void *priv); enum xfs_rmap_intent_type { XFS_RMAP_MAP, @@ -177,7 +179,7 @@ enum xfs_rmap_intent_type { struct xfs_rmap_intent { struct list_head ri_list; enum xfs_rmap_intent_type ri_type; - __uint64_t ri_owner; + uint64_t ri_owner; int ri_whichfork; struct xfs_bmbt_irec ri_bmap; }; @@ -194,15 +196,15 @@ int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *imap); int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - __uint64_t owner); + uint64_t owner); int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - __uint64_t owner); + uint64_t owner); void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type, - __uint64_t owner, int whichfork, xfs_fileoff_t startoff, + uint64_t owner, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_exntst_t state, struct xfs_btree_cur **pcur); @@ -212,5 +214,10 @@ int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat); +int xfs_rmap_compare(const struct xfs_rmap_irec *a, + const struct xfs_rmap_irec *b); +union xfs_btree_rec; +int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, + struct xfs_rmap_irec *irec); #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 74e5a54bc428..9d9c9192584c 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -199,7 +199,7 @@ xfs_rmapbt_init_high_key_from_rec( union xfs_btree_key *key, union xfs_btree_rec *rec) { - __uint64_t off; + uint64_t off; int adj; adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; @@ -241,7 +241,7 @@ xfs_rmapbt_init_ptr_from_cur( ptr->s = agf->agf_roots[cur->bc_btnum]; } -STATIC __int64_t +STATIC int64_t xfs_rmapbt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) @@ -249,9 +249,9 @@ xfs_rmapbt_key_diff( struct xfs_rmap_irec *rec = &cur->bc_rec.r; struct xfs_rmap_key *kp = &key->rmap; __u64 x, y; - __int64_t d; + int64_t d; - d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; + d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; if (d) return d; @@ -271,7 +271,7 @@ xfs_rmapbt_key_diff( return 0; } -STATIC __int64_t +STATIC int64_t xfs_rmapbt_diff_two_keys( struct xfs_btree_cur *cur, union xfs_btree_key *k1, @@ -279,10 +279,10 @@ xfs_rmapbt_diff_two_keys( { struct xfs_rmap_key *kp1 = &k1->rmap; struct xfs_rmap_key *kp2 = &k2->rmap; - __int64_t d; + int64_t d; __u64 x, y; - d = (__int64_t)be32_to_cpu(kp1->rm_startblock) - + d = (int64_t)be32_to_cpu(kp1->rm_startblock) - be32_to_cpu(kp2->rm_startblock); if (d) return d; @@ -377,17 +377,16 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { .verify_write = xfs_rmapbt_write_verify, }; -#if defined(DEBUG) || defined(XFS_WARN) STATIC int xfs_rmapbt_keys_inorder( struct xfs_btree_cur *cur, union xfs_btree_key *k1, union xfs_btree_key *k2) { - __uint32_t x; - __uint32_t y; - __uint64_t a; - __uint64_t b; + uint32_t x; + uint32_t y; + uint64_t a; + uint64_t b; x = be32_to_cpu(k1->rmap.rm_startblock); y = be32_to_cpu(k2->rmap.rm_startblock); @@ -414,10 +413,10 @@ xfs_rmapbt_recs_inorder( union xfs_btree_rec *r1, union xfs_btree_rec *r2) { - __uint32_t x; - __uint32_t y; - __uint64_t a; - __uint64_t b; + uint32_t x; + uint32_t y; + uint64_t a; + uint64_t b; x = be32_to_cpu(r1->rmap.rm_startblock); y = be32_to_cpu(r2->rmap.rm_startblock); @@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder( return 1; return 0; } -#endif /* DEBUG */ static const struct xfs_btree_ops xfs_rmapbt_ops = { .rec_len = sizeof(struct xfs_rmap_rec), @@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = { .key_diff = xfs_rmapbt_key_diff, .buf_ops = &xfs_rmapbt_buf_ops, .diff_two_keys = xfs_rmapbt_diff_two_keys, -#if defined(DEBUG) || defined(XFS_WARN) .keys_inorder = xfs_rmapbt_keys_inorder, .recs_inorder = xfs_rmapbt_recs_inorder, -#endif }; /* diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index ea45584a9913..5d4e43ef4eea 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = { * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ -static int +int xfs_rtbuf_get( xfs_mount_t *mp, /* file system mount structure */ xfs_trans_t *tp, /* transaction pointer */ @@ -1011,8 +1011,78 @@ xfs_rtfree_extent( mp->m_sb.sb_rextents) { if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0; + *(uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0; xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); } return 0; } + +/* Find all the free records within a given range. */ +int +xfs_rtalloc_query_range( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *low_rec, + struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, + void *priv) +{ + struct xfs_rtalloc_rec rec; + struct xfs_mount *mp = tp->t_mountp; + xfs_rtblock_t rtstart; + xfs_rtblock_t rtend; + xfs_rtblock_t rem; + int is_free; + int error = 0; + + if (low_rec->ar_startblock > high_rec->ar_startblock) + return -EINVAL; + else if (low_rec->ar_startblock == high_rec->ar_startblock) + return 0; + + /* Iterate the bitmap, looking for discrepancies. */ + rtstart = low_rec->ar_startblock; + rem = high_rec->ar_startblock - rtstart; + while (rem) { + /* Is the first block free? */ + error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, + &is_free); + if (error) + break; + + /* How long does the extent go for? */ + error = xfs_rtfind_forw(mp, tp, rtstart, + high_rec->ar_startblock - 1, &rtend); + if (error) + break; + + if (is_free) { + rec.ar_startblock = rtstart; + rec.ar_blockcount = rtend - rtstart + 1; + + error = fn(tp, &rec, priv); + if (error) + break; + } + + rem -= rtend - rtstart + 1; + rtstart = rtend + 1; + } + + return error; +} + +/* Find all the free records. */ +int +xfs_rtalloc_query_all( + struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv) +{ + struct xfs_rtalloc_rec keys[2]; + + keys[0].ar_startblock = 0; + keys[1].ar_startblock = tp->t_mountp->m_sb.sb_rblocks; + keys[0].ar_blockcount = keys[1].ar_blockcount = 0; + + return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv); +} diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 584ec896a533..9b5aae2bcc0b 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -448,7 +448,7 @@ xfs_sb_quota_to_disk( struct xfs_dsb *to, struct xfs_sb *from) { - __uint16_t qflags = from->sb_qflags; + uint16_t qflags = from->sb_qflags; to->sb_uquotino = cpu_to_be64(from->sb_uquotino); if (xfs_sb_version_has_pquotino(from)) { @@ -756,7 +756,7 @@ xfs_sb_mount_common( mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); - mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, + mp->m_ialloc_inos = (int)MAX((uint16_t)XFS_INODES_PER_CHUNK, sbp->sb_inopblock); mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 2e2c6716b623..c484877129a0 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -114,7 +114,7 @@ xfs_symlink_verify( if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) return false; if (be32_to_cpu(dsl->sl_offset) + - be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) return false; if (dsl->sl_owner == 0) return false; diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index b456cca1bfb2..6bd916bd35e2 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -477,14 +477,14 @@ xfs_calc_mkdir_reservation( /* * Making a new symplink is the same as creating a new file, but * with the added blocks for remote symlink data which can be up to 1kB in - * length (MAXPATHLEN). + * length (XFS_SYMLINK_MAXLEN). */ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { return xfs_calc_create_reservation(mp) + - xfs_calc_buf_res(1, MAXPATHLEN); + xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); } /* diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 7917f6e44286..d787c677d2a3 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -21,8 +21,20 @@ /* * Components of space reservations. */ + +/* Worst case number of rmaps that can be held in a block. */ #define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \ (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0])) + +/* Adding one rmap could split every level up to the top of the tree. */ +#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) + +/* Blocks we might need to add "b" rmaps to a tree. */ +#define XFS_NRMAPADD_SPACE_RES(mp, b)\ + (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ + XFS_RMAPADD_SPACE_RES(mp)) + #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) #define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) @@ -30,13 +42,12 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) + +/* Blocks we might need to add "b" mappings & rmappings to a file. */ #define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\ - (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ - XFS_EXTENTADD_SPACE_RES(mp,w) + \ - ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ - (mp)->m_rmap_maxlevels) + (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \ + XFS_NRMAPADD_SPACE_RES((mp), (b))) + #define XFS_DAENTER_1B(mp,w) \ ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 717909f2f7b7..0220159bd463 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -18,34 +18,34 @@ #ifndef __XFS_TYPES_H__ #define __XFS_TYPES_H__ -typedef __uint32_t prid_t; /* project ID */ +typedef uint32_t prid_t; /* project ID */ -typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ -typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ -typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ -typedef __uint32_t xfs_agnumber_t; /* allocation group number */ -typedef __int32_t xfs_extnum_t; /* # of extents in a file */ -typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */ -typedef __int64_t xfs_fsize_t; /* bytes in a file */ -typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ +typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ +typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ +typedef uint32_t xfs_extlen_t; /* extent length in blocks */ +typedef uint32_t xfs_agnumber_t; /* allocation group number */ +typedef int32_t xfs_extnum_t; /* # of extents in a file */ +typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ +typedef int64_t xfs_fsize_t; /* bytes in a file */ +typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ -typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */ -typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */ +typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ +typedef int32_t xfs_rtword_t; /* word type for bitmap manipulations */ -typedef __int64_t xfs_lsn_t; /* log sequence number */ -typedef __int32_t xfs_tid_t; /* transaction identifier */ +typedef int64_t xfs_lsn_t; /* log sequence number */ +typedef int32_t xfs_tid_t; /* transaction identifier */ -typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ -typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ +typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ +typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ -typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ -typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __uint64_t xfs_fileoff_t; /* block number in a file */ -typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ +typedef uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ +typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ +typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ +typedef uint64_t xfs_fileoff_t; /* block number in a file */ +typedef uint64_t xfs_filblks_t; /* number of blocks in a file */ -typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ +typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +typedef int64_t xfs_sfiloff_t; /* signed block number in a file */ /* * Null values for the types. @@ -125,7 +125,7 @@ struct xfs_name { * uid_t and gid_t are hard-coded to 32 bits in the inode. * Hence, an 'id' in a dquot is 32 bits.. */ -typedef __uint32_t xfs_dqid_t; +typedef uint32_t xfs_dqid_t; /* * Constants for bit manipulations. diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c deleted file mode 100644 index b83f76b6d410..000000000000 --- a/fs/xfs/uuid.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include <xfs.h> - -/* IRIX interpretation of an uuid_t */ -typedef struct { - __be32 uu_timelow; - __be16 uu_timemid; - __be16 uu_timehi; - __be16 uu_clockseq; - __be16 uu_node[3]; -} xfs_uu_t; - -/* - * uuid_getnodeuniq - obtain the node unique fields of a UUID. - * - * This is not in any way a standard or condoned UUID function; - * it just something that's needed for user-level file handles. - */ -void -uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) -{ - xfs_uu_t *uup = (xfs_uu_t *)uuid; - - fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | - be16_to_cpu(uup->uu_timemid); - fsid[1] = be32_to_cpu(uup->uu_timelow); -} - -int -uuid_is_nil(uuid_t *uuid) -{ - int i; - char *cp = (char *)uuid; - - if (uuid == NULL) - return 0; - /* implied check of version number here... */ - for (i = 0; i < sizeof *uuid; i++) - if (*cp++) return 0; /* not nil */ - return 1; /* is nil */ -} - -int -uuid_equal(uuid_t *uuid1, uuid_t *uuid2) -{ - return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; -} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h deleted file mode 100644 index 104db0f3bed6..000000000000 --- a/fs/xfs/uuid.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_UUID_H__ -#define __XFS_SUPPORT_UUID_H__ - -typedef struct { - unsigned char __u_bits[16]; -} uuid_t; - -extern int uuid_is_nil(uuid_t *uuid); -extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); -extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); - -static inline void -uuid_copy(uuid_t *dst, uuid_t *src) -{ - memcpy(dst, src, sizeof(uuid_t)); -} - -#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index a742c47f7d5a..80cd0fd86783 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -24,6 +24,10 @@ #define XFS_BUF_LOCK_TRACKING 1 #endif +#ifdef CONFIG_XFS_ASSERT_FATAL +#define XFS_ASSERT_FATAL 1 +#endif + #ifdef CONFIG_XFS_WARN #define XFS_WARN 1 #endif diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b468e041f207..7034e17535de 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type) return acl; } -STATIC int -__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +int +__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct xfs_inode *ip = XFS_I(inode); unsigned char *ea_name; @@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } set_acl: - return __xfs_set_acl(inode, type, acl); + return __xfs_set_acl(inode, acl, type); } diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 286fa89217f5..04327318ef67 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -24,6 +24,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); #else static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 631e7c0e0a29..6bf120bb1a17 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -103,19 +103,19 @@ xfs_finish_page_writeback( unsigned int bsize; ASSERT(bvec->bv_offset < PAGE_SIZE); - ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0); + ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); ASSERT(end < PAGE_SIZE); - ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0); + ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); bh = head = page_buffers(bvec->bv_page); bsize = bh->b_size; do { + if (off > end) + break; next = bh->b_this_page; if (off < bvec->bv_offset) goto next_bh; - if (off > end) - break; bh->b_end_io(bh, !error); next_bh: off += bsize; @@ -189,7 +189,7 @@ xfs_setfilesize_trans_alloc( * We hand off the transaction to the completion thread now, so * clear the flag here. */ - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return 0; } @@ -252,7 +252,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); /* we abort the update if there was an IO error */ @@ -274,54 +274,50 @@ xfs_end_io( struct xfs_ioend *ioend = container_of(work, struct xfs_ioend, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); - int error = ioend->io_bio->bi_error; + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; + int error; /* - * Set an error if the mount has shut down and proceed with end I/O - * processing so it can perform whatever cleanups are necessary. + * Just clean up the in-memory strutures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = -EIO; + goto done; + } /* - * For a CoW extent, we need to move the mapping from the CoW fork - * to the data fork. If instead an error happened, just dump the - * new blocks. + * Clean up any COW blocks on an I/O error. */ - if (ioend->io_type == XFS_IO_COW) { - if (error) - goto done; - if (ioend->io_bio->bi_error) { - error = xfs_reflink_cancel_cow_range(ip, - ioend->io_offset, ioend->io_size); - goto done; + error = blk_status_to_errno(ioend->io_bio->bi_status); + if (unlikely(error)) { + switch (ioend->io_type) { + case XFS_IO_COW: + xfs_reflink_cancel_cow_range(ip, offset, size, true); + break; } - error = xfs_reflink_end_cow(ip, ioend->io_offset, - ioend->io_size); - if (error) - goto done; + + goto done; } /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - * Detecting and handling completion IO errors is done individually - * for each case as different cleanup operations need to be performed - * on error. + * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_type == XFS_IO_UNWRITTEN) { - if (error) - goto done; - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - } else if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(ioend, error); - } else { - ASSERT(!xfs_ioend_is_append(ioend) || - ioend->io_type == XFS_IO_COW); + switch (ioend->io_type) { + case XFS_IO_COW: + error = xfs_reflink_end_cow(ip, offset, size); + break; + case XFS_IO_UNWRITTEN: + error = xfs_iomap_write_unwritten(ip, offset, size); + break; + default: + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); + break; } done: + if (ioend->io_append_trans) + error = xfs_setfilesize_ioend(ioend, error); xfs_destroy_ioend(ioend, error); } @@ -337,7 +333,7 @@ xfs_end_bio( else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else - xfs_destroy_ioend(ioend, bio->bi_error); + xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); } STATIC int @@ -349,7 +345,7 @@ xfs_map_blocks( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; + ssize_t count = i_blocksize(inode); xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; @@ -481,6 +477,12 @@ xfs_submit_ioend( struct xfs_ioend *ioend, int status) { + /* Convert CoW extents to regular */ + if (!status && ioend->io_type == XFS_IO_COW) { + status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), + ioend->io_offset, ioend->io_size); + } + /* Reserve log space if we might write beyond the on-disk inode size. */ if (!status && ioend->io_type != XFS_IO_UNWRITTEN && @@ -499,11 +501,12 @@ xfs_submit_ioend( * time. */ if (status) { - ioend->io_bio->bi_error = status; + ioend->io_bio->bi_status = errno_to_blk_status(status); bio_endio(ioend->io_bio); return status; } + ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); return 0; } @@ -563,6 +566,7 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); + ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); ioend->io_bio = new; } @@ -752,7 +756,7 @@ xfs_aops_discard_page( break; } next_buffer: - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while ((bh = bh->b_this_page) != head); @@ -835,12 +839,12 @@ xfs_writepage_map( struct inode *inode, struct page *page, loff_t offset, - __uint64_t end_offset) + uint64_t end_offset) { LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; - ssize_t len = 1 << inode->i_blkbits; + ssize_t len = i_blocksize(inode); int error = 0; int count = 0; int uptodate = 1; @@ -990,7 +994,7 @@ xfs_do_writepage( struct xfs_writepage_ctx *wpc = data; struct inode *inode = page->mapping->host; loff_t offset; - __uint64_t end_offset; + uint64_t end_offset; pgoff_t end_index; trace_xfs_writepage(inode, page, 0, 0); @@ -1015,7 +1019,7 @@ xfs_do_writepage( * Given that we do not allow direct reclaim to call us, we should * never be called while in a filesystem transaction. */ - if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) goto redirty; /* @@ -1204,7 +1208,7 @@ xfs_map_trim_size( offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, - 1 << inode->i_blkbits); + i_blocksize(inode)); } if (mapping_size > LONG_MAX) mapping_size = LONG_MAX; @@ -1235,7 +1239,7 @@ xfs_get_blocks( return -EIO; offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + ASSERT(bh_result->b_size >= i_blocksize(inode)); size = bh_result->b_size; if (offset >= i_size_read(inode)) @@ -1260,8 +1264,8 @@ xfs_get_blocks( if (nimaps) { trace_xfs_get_blocks_found(ip, offset, size, - ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN - : XFS_IO_OVERWRITE, &imap); + imap.br_state == XFS_EXT_UNWRITTEN ? + XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); xfs_iunlock(ip, lockmode); } else { trace_xfs_get_blocks_notfound(ip, offset, size); @@ -1275,9 +1279,7 @@ xfs_get_blocks( * For unwritten extents do not report a disk address in the buffered * read case (treat as if we're reading into a hole). */ - if (imap.br_startblock != HOLESTARTBLOCK && - imap.br_startblock != DELAYSTARTBLOCK && - !ISUNWRITTEN(&imap)) + if (xfs_bmap_is_real_extent(&imap)) xfs_map_buffer(inode, bh_result, &imap, offset); /* @@ -1317,9 +1319,12 @@ xfs_vm_bmap( * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseѕ the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, - * 0 is the magic code for a bmap error.. + * 0 is the magic code for a bmap error. + * + * Since we don't pass back blockdev info, we can't return bmap + * information for rt files either. */ - if (xfs_is_reflink_inode(ip)) + if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; filemap_write_and_wait(mapping); @@ -1383,7 +1388,7 @@ xfs_vm_set_page_dirty( if (offset < end_offset) set_buffer_dirty(bh); bh = bh->b_this_page; - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while (bh != head); } /* diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index d14691aa02b4..5d5a5e277f35 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, unsigned char *, int, int); typedef struct xfs_attr_list_context { + struct xfs_trans *tp; struct xfs_inode *dp; /* inode */ struct attrlist_cursor_kern *cursor; /* position in list */ char *alist; /* output buffer */ @@ -140,8 +141,10 @@ typedef struct xfs_attr_list_context { * Overall external interface routines. */ int xfs_attr_inactive(struct xfs_inode *dp); +int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *); int xfs_attr_list_int(struct xfs_attr_list_context *); int xfs_inode_hasattr(struct xfs_inode *ip); +int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, unsigned char *value, int *valuelenp, int flags); int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 97c45b6eb91e..7740c8a5e736 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ bp = NULL; if (cursor->blkno > 0) { - error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, + error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if ((error != 0) && (error != -EFSCORRUPTED)) return error; @@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) case XFS_DA_NODE_MAGIC: case XFS_DA3_NODE_MAGIC: trace_xfs_attr_list_wrong_blk(context); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); bp = NULL; break; case XFS_ATTR_LEAF_MAGIC: @@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (cursor->hashval > be32_to_cpu( entries[leafhdr.count - 1].hashval)) { trace_xfs_attr_list_wrong_blk(context); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); bp = NULL; } else if (cursor->hashval <= be32_to_cpu( entries[0].hashval)) { trace_xfs_attr_list_wrong_blk(context); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); bp = NULL; } break; default: trace_xfs_attr_list_wrong_blk(context); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); bp = NULL; } } @@ -279,9 +279,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (bp == NULL) { cursor->blkno = 0; for (;;) { - __uint16_t magic; + uint16_t magic; - error = xfs_da3_node_read(NULL, dp, + error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) @@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) XFS_ERRLEVEL_LOW, context->dp->i_mount, node); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); return -EFSCORRUPTED; } @@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) } } if (i == nodehdr.count) { - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); return 0; } - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); } } ASSERT(bp != NULL); @@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (context->seen_enough || leafhdr.forw == 0) break; cursor->blkno = leafhdr.forw; - xfs_trans_brelse(NULL, bp); - error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp); + xfs_trans_brelse(context->tp, bp); + error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp); if (error) return error; } - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); return 0; } @@ -448,16 +448,36 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) trace_xfs_attr_leaf_list(context); context->cursor->blkno = 0; - error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp); + error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp); if (error) return error; xfs_attr3_leaf_list_int(bp, context); - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(context->tp, bp); return 0; } int +xfs_attr_list_int_ilocked( + struct xfs_attr_list_context *context) +{ + struct xfs_inode *dp = context->dp; + + ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + + /* + * Decide on what work routines to call based on the inode size. + */ + if (!xfs_inode_hasattr(dp)) + return 0; + else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + return xfs_attr_shortform_list(context); + else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) + return xfs_attr_leaf_list(context); + return xfs_attr_node_list(context); +} + +int xfs_attr_list_int( xfs_attr_list_context_t *context) { @@ -470,19 +490,8 @@ xfs_attr_list_int( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - /* - * Decide on what work routines to call based on the inode size. - */ lock_mode = xfs_ilock_attr_map_shared(dp); - if (!xfs_inode_hasattr(dp)) { - error = 0; - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - error = xfs_attr_shortform_list(context); - } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - error = xfs_attr_leaf_list(context); - } else { - error = xfs_attr_node_list(context); - } + error = xfs_attr_list_int_ilocked(context); xfs_iunlock(dp, lock_mode); return error; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9bf57c76623b..88073910fa5d 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -34,6 +34,8 @@ #include "xfs_bmap.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" kmem_zone_t *xfs_bui_zone; @@ -215,6 +217,7 @@ void xfs_bui_release( struct xfs_bui_log_item *buip) { + ASSERT(atomic_read(&buip->bui_refcount) > 0); if (atomic_dec_and_test(&buip->bui_refcount)) { xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); xfs_bui_item_free(buip); @@ -393,6 +396,7 @@ xfs_bui_recover( struct xfs_map_extent *bmap; xfs_fsblock_t startblock_fsb; xfs_fsblock_t inode_fsb; + xfs_filblks_t count; bool op_ok; struct xfs_bud_log_item *budp; enum xfs_bmap_intent_type type; @@ -401,6 +405,7 @@ xfs_bui_recover( struct xfs_trans *tp; struct xfs_inode *ip = NULL; struct xfs_defer_ops dfops; + struct xfs_bmbt_irec irec; xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -446,7 +451,8 @@ xfs_bui_recover( return -EIO; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) return error; budp = xfs_trans_get_bud(tp, buip); @@ -477,13 +483,24 @@ xfs_bui_recover( } xfs_trans_ijoin(tp, ip, 0); + count = bmap->me_len; error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, ip, whichfork, bmap->me_startoff, - bmap->me_startblock, bmap->me_len, - state); + bmap->me_startblock, &count, state); if (error) goto err_dfops; + if (count > 0) { + ASSERT(type == XFS_BMAP_UNMAP); + irec.br_startblock = bmap->me_startblock; + irec.br_blockcount = count; + irec.br_startoff = bmap->me_startoff; + irec.br_state = state; + error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + if (error) + goto err_dfops; + } + /* Finish transaction, free inodes. */ error = xfs_defer_finish(&tp, &dfops, NULL); if (error) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c1417919ab0a..93e955262d07 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -81,14 +81,13 @@ xfs_zero_extent( return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), block << (mp->m_super->s_blocksize_bits - 9), count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_NOFS, true); + GFP_NOFS, 0); } int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - xfs_alloctype_t atype = 0; /* type for allocation routines */ int error; /* error return value */ xfs_mount_t *mp; /* mount point structure */ xfs_extlen_t prod = 0; /* product factor for allocators */ @@ -155,18 +154,14 @@ xfs_bmap_rtalloc( /* * Realtime allocation, done through xfs_rtallocate_extent. */ - atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; - if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, - &ralen, atype, ap->wasdel, prod, &rtb))) - return error; - if (rtb == NULLFSBLOCK && prod > 1 && - (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, - ap->length, &ralen, atype, - ap->wasdel, 1, &rtb))) + error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, + &ralen, ap->wasdel, prod, &rtb); + if (error) return error; + ap->blkno = rtb; if (ap->blkno != NULLFSBLOCK) { ap->blkno *= mp->m_sb.sb_rextsize; @@ -224,20 +219,24 @@ xfs_bmap_eof( */ /* - * Count leaf blocks given a range of extent records. + * Count leaf blocks given a range of extent records. Delayed allocation + * extents are not counted towards the totals. */ STATIC void xfs_bmap_count_leaves( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - int numrecs, - int *count) + struct xfs_ifork *ifp, + xfs_extnum_t *numrecs, + xfs_filblks_t *count) { - int b; - - for (b = 0; b < numrecs; b++) { - xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); - *count += xfs_bmbt_get_blockcount(frp); + xfs_extnum_t i; + xfs_extnum_t nr_exts = xfs_iext_count(ifp); + + for (i = 0; i < nr_exts; i++) { + xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i); + if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) { + (*numrecs)++; + *count += xfs_bmbt_get_blockcount(frp); + } } } @@ -250,7 +249,7 @@ xfs_bmap_disk_count_leaves( struct xfs_mount *mp, struct xfs_btree_block *block, int numrecs, - int *count) + xfs_filblks_t *count) { int b; xfs_bmbt_rec_t *frp; @@ -265,17 +264,18 @@ xfs_bmap_disk_count_leaves( * Recursively walks each level of a btree * to count total fsblocks in use. */ -STATIC int /* error */ +STATIC int xfs_bmap_count_tree( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fsblock_t blockno, /* file system block number */ - int levelin, /* level in btree */ - int *count) /* Count of blocks */ + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_ifork *ifp, + xfs_fsblock_t blockno, + int levelin, + xfs_extnum_t *nextents, + xfs_filblks_t *count) { int error; - xfs_buf_t *bp, *nbp; + struct xfs_buf *bp, *nbp; int level = levelin; __be64 *pp; xfs_fsblock_t bno = blockno; @@ -308,8 +308,9 @@ xfs_bmap_count_tree( /* Dive to the next level */ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); - if (unlikely((error = - xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { + error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents, + count); + if (error) { xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", XFS_ERRLEVEL_LOW, mp); @@ -321,6 +322,7 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); + (*nextents) += numrecs; xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) @@ -339,46 +341,64 @@ xfs_bmap_count_tree( } /* - * Count fsblocks of the given fork. + * Count fsblocks of the given fork. Delayed allocation extents are + * not counted towards the totals. */ -static int /* error */ +int xfs_bmap_count_blocks( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork, /* data or attr fork */ - int *count) /* out: count of blocks */ + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *nextents, + xfs_filblks_t *count) { + struct xfs_mount *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ struct xfs_btree_block *block; /* current btree block */ + struct xfs_ifork *ifp; /* fork structure */ xfs_fsblock_t bno; /* block # of "block" */ - xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ + int error; bno = NULLFSBLOCK; mp = ip->i_mount; + *nextents = 0; + *count = 0; ifp = XFS_IFORK_PTR(ip, whichfork); - if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count); + if (!ifp) return 0; - } - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. - */ - block = ifp->if_broot; - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - ASSERT(bno != NULLFSBLOCK); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - - if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { - XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, - mp); - return -EFSCORRUPTED; + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_EXTENTS: + xfs_bmap_count_leaves(ifp, nextents, count); + return 0; + case XFS_DINODE_FMT_BTREE: + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + /* + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + */ + block = ifp->if_broot; + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLFSBLOCK); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + + error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, + nextents, count); + if (error) { + XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", + XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + return 0; } return 0; @@ -394,11 +414,11 @@ xfs_getbmapx_fix_eof_hole( struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with * preallocated data space */ - __int64_t end, /* last block requested */ + int64_t end, /* last block requested */ xfs_fsblock_t startblock, bool moretocome) { - __int64_t fixlen; + int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent pointer */ @@ -453,16 +473,15 @@ xfs_getbmap_adjust_shared( next_map->br_blockcount = 0; /* Only written data blocks can be shared. */ - if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK || - map->br_startblock == DELAYSTARTBLOCK || - map->br_startblock == HOLESTARTBLOCK || - ISUNWRITTEN(map)) + if (!xfs_is_reflink_inode(ip) || + whichfork != XFS_DATA_FORK || + !xfs_bmap_is_real_extent(map)) return 0; agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); - error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount, - &ebno, &elen, true); + error = xfs_reflink_find_shared(mp, NULL, agno, agbno, + map->br_blockcount, &ebno, &elen, true); if (error) return error; @@ -520,9 +539,9 @@ xfs_getbmap( xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { - __int64_t bmvend; /* last block requested */ + int64_t bmvend; /* last block requested */ int error = 0; /* return value */ - __int64_t fixlen; /* length for -1 case */ + int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ @@ -588,9 +607,13 @@ xfs_getbmap( } break; default: + /* Local format data forks report no extents. */ + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + bmv->bmv_entries = 0; + return 0; + } if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) return -EINVAL; if (xfs_get_extsz_hint(ip) || @@ -607,7 +630,7 @@ xfs_getbmap( if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); bmv->bmv_length = - max_t(__int64_t, fixlen - bmv->bmv_offset, 0); + max_t(int64_t, fixlen - bmv->bmv_offset, 0); } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; @@ -718,7 +741,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && @@ -744,7 +767,7 @@ xfs_getbmap( out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = - max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + max_t(int64_t, 0, bmvend - bmv->bmv_offset); /* * In case we don't want to return the hole, @@ -787,11 +810,9 @@ xfs_getbmap( xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { - int full = 0; /* user array is full */ - /* format results & advance arg */ - error = formatter(&arg, &out[i], &full); - if (error || full) + error = formatter(&arg, &out[i]); + if (error) break; } @@ -911,23 +932,22 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) } /* - * This is called by xfs_inactive to free any blocks beyond eof - * when the link count isn't zero and by xfs_dm_punch_hole() when - * punching a hole to EOF. + * This is called to free any blocks beyond eof. The caller must hold + * IOLOCK_EXCL unless we are in the inode reclaim path and have the only + * reference to the inode. */ int xfs_free_eofblocks( - xfs_mount_t *mp, - xfs_inode_t *ip, - bool need_iolock) + struct xfs_inode *ip) { - xfs_trans_t *tp; - int error; - xfs_fileoff_t end_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t map_len; - int nimaps; - xfs_bmbt_irec_t imap; + struct xfs_trans *tp; + int error; + xfs_fileoff_t end_fsb; + xfs_fileoff_t last_fsb; + xfs_filblks_t map_len; + int nimaps; + struct xfs_bmbt_irec imap; + struct xfs_mount *mp = ip->i_mount; /* * Figure out if there are any blocks beyond the end @@ -944,6 +964,10 @@ xfs_free_eofblocks( error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* + * If there are blocks after the end of file, truncate the file to its + * current size to free them up. + */ if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { @@ -954,22 +978,13 @@ xfs_free_eofblocks( if (error) return error; - /* - * There are blocks after the end of file. - * Free them up now by truncating the file to - * its current size. - */ - if (need_iolock) { - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) - return -EAGAIN; - } + /* wait on dio to ensure i_size has settled */ + inode_dio_wait(VFS_I(ip)); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -997,8 +1012,6 @@ xfs_free_eofblocks( } xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; } @@ -1222,11 +1235,8 @@ xfs_adjust_extent_unmap_boundaries( return error; if (nimap && imap.br_startblock != HOLESTARTBLOCK) { - xfs_daddr_t block; - ASSERT(imap.br_startblock != DELAYSTARTBLOCK); - block = imap.br_startblock; - mod = do_div(block, mp->m_sb.sb_rextsize); + mod = do_mod(imap.br_startblock, mp->m_sb.sb_rextsize); if (mod) *startoffset_fsb += mp->m_sb.sb_rextsize - mod; } @@ -1324,8 +1334,16 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any * partial block at the beginning and/or end. xfs_zero_range is - * smart enough to skip any holes, including those we just created. + * smart enough to skip any holes, including those we just created, + * but we must take care not to zero beyond EOF and enlarge i_size. */ + + if (offset >= XFS_ISIZE(ip)) + return 0; + + if (offset + len > XFS_ISIZE(ip)) + len = XFS_ISIZE(ip) - offset; + return xfs_zero_range(ip, offset, len, NULL); } @@ -1393,10 +1411,16 @@ xfs_shift_file_space( xfs_fileoff_t stop_fsb; xfs_fileoff_t next_fsb; xfs_fileoff_t shift_fsb; + uint resblks; ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); if (direction == SHIFT_LEFT) { + /* + * Reserve blocks to cover potential extent merges after left + * shift operations. + */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); next_fsb = XFS_B_TO_FSB(mp, offset + len); stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); } else { @@ -1404,6 +1428,7 @@ xfs_shift_file_space( * If right shift, delegate the work of initialization of * next_fsb to xfs_bmap_shift_extent as it has ilock held. */ + resblks = 0; next_fsb = NULLFSBLOCK; stop_fsb = XFS_B_TO_FSB(mp, offset); } @@ -1415,7 +1440,7 @@ xfs_shift_file_space( * into the accessible region of the file. */ if (xfs_can_free_eofblocks(ip, true)) { - error = xfs_free_eofblocks(mp, ip, false); + error = xfs_free_eofblocks(ip); if (error) return error; } @@ -1445,21 +1470,14 @@ xfs_shift_file_space( } while (!error && !done) { - /* - * We would need to reserve permanent block for transaction. - * This will come into picture when after shifting extent into - * hole we found that adjacent extents can be merged which - * may lead to freeing of a block during record update. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, + &tp); if (error) break; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, - ip->i_gdquot, ip->i_pdquot, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, + ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto out_trans_cancel; @@ -1624,7 +1642,7 @@ xfs_swap_extents_check_format( * extent format... */ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_BOFF(ip) && + if (XFS_IFORK_Q(ip) && XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) return -EINVAL; if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= @@ -1634,7 +1652,7 @@ xfs_swap_extents_check_format( /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_BOFF(tip) && + if (XFS_IFORK_Q(tip) && XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) return -EINVAL; if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= @@ -1683,7 +1701,7 @@ xfs_swap_extent_rmap( xfs_filblks_t ilen; xfs_filblks_t rlen; int nimaps; - __uint64_t tip_flags2; + uint64_t tip_flags2; /* * If the source file has shared blocks, we must flag the donor @@ -1796,10 +1814,11 @@ xfs_swap_extent_forks( int *target_log_flags) { struct xfs_ifork tempifp, *ifp, *tifp; - int aforkblks = 0; - int taforkblks = 0; + xfs_filblks_t aforkblks = 0; + xfs_filblks_t taforkblks = 0; + xfs_extnum_t junk; xfs_extnum_t nextents; - __uint64_t tmp; + uint64_t tmp; int error; /* @@ -1807,14 +1826,14 @@ xfs_swap_extent_forks( */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, + error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, &aforkblks); if (error) return error; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, + error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, &taforkblks); if (error) return error; @@ -1857,15 +1876,15 @@ xfs_swap_extent_forks( /* * Fix the on-disk inode values */ - tmp = (__uint64_t)ip->i_d.di_nblocks; + tmp = (uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; - tmp = (__uint64_t) ip->i_d.di_nextents; + tmp = (uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; - tmp = (__uint64_t) ip->i_d.di_format; + tmp = (uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; @@ -1934,7 +1953,7 @@ xfs_swap_extents( int error = 0; int lock_flags; struct xfs_ifork *cowfp; - __uint64_t f; + uint64_t f; int resblks; /* diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 68a621a8e0c0..0cede1043571 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, xfs_fileoff_t start_fsb, xfs_fileoff_t length); /* bmap to userspace formatter - copy to user & advance pointer */ -typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); +typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, xfs_bmap_format_t formatter, void *arg); @@ -63,12 +63,15 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); -int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, - bool need_iolock); +int xfs_free_eofblocks(struct xfs_inode *ip); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, struct xfs_swapext *sx); xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); +int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, xfs_extnum_t *nextents, + xfs_filblks_t *count); + #endif /* __XFS_BMAP_UTIL_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ac3b4db519df..72f038492ba8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -33,6 +33,7 @@ #include <linux/migrate.h> #include <linux/backing-dev.h> #include <linux/freezer.h> +#include <linux/sched/mm.h> #include "xfs_format.h" #include "xfs_log_format.h" @@ -96,12 +97,16 @@ static inline void xfs_buf_ioacct_inc( struct xfs_buf *bp) { - if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) + if (bp->b_flags & XBF_NO_IOACCT) return; ASSERT(bp->b_flags & XBF_ASYNC); - bp->b_flags |= _XBF_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + percpu_counter_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); } /* @@ -109,14 +114,24 @@ xfs_buf_ioacct_inc( * freed and unaccount from the buftarg. */ static inline void -xfs_buf_ioacct_dec( +__xfs_buf_ioacct_dec( struct xfs_buf *bp) { - if (!(bp->b_flags & _XBF_IN_FLIGHT)) - return; + lockdep_assert_held(&bp->b_lock); + + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + percpu_counter_dec(&bp->b_target->bt_io_count); + } +} - bp->b_flags &= ~_XBF_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); +static inline void +xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); } /* @@ -148,9 +163,9 @@ xfs_buf_stale( * unaccounted (released to LRU) before that occurs. Drop in-flight * status now to preserve accounting consistency. */ - xfs_buf_ioacct_dec(bp); - spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) @@ -442,17 +457,17 @@ _xfs_buf_map_pages( bp->b_addr = NULL; } else { int retried = 0; - unsigned noio_flag; + unsigned nofs_flag; /* * vm_map_ram() will allocate auxillary structures (e.g. * pagetables) with GFP_KERNEL, yet we are likely to be under * GFP_NOFS context here. Hence we need to tell memory reclaim - * that we are in such a context via PF_MEMALLOC_NOIO to prevent + * that we are in such a context via PF_MEMALLOC_NOFS to prevent * memory reclaim re-entering the filesystem here and * potentially deadlocking. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, -1, PAGE_KERNEL); @@ -460,7 +475,7 @@ _xfs_buf_map_pages( break; vm_unmap_aliases(); } while (retried++ <= 1); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); if (!bp->b_addr) return -ENOMEM; @@ -758,7 +773,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { - if (bdi_read_congested(target->bt_bdi)) + if (bdi_read_congested(target->bt_bdev->bd_bdi)) return; xfs_buf_read_map(target, map, nmaps, @@ -978,12 +993,12 @@ xfs_buf_rele( * ensures the decrement occurs only once per-buf. */ if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); goto out_unlock; } /* the last reference has been dropped ... */ - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the @@ -1078,6 +1093,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { + ASSERT(xfs_buf_islocked(bp)); + XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1177,7 +1194,7 @@ xfs_buf_ioerror_alert( { xfs_alert(bp->b_target->bt_mount, "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", - (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); + (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); } int @@ -1210,8 +1227,11 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (bio->bi_error) - cmpxchg(&bp->b_io_error, 0, bio->bi_error); + if (bio->bi_status) { + int error = blk_status_to_errno(bio->bi_status); + + cmpxchg(&bp->b_io_error, 0, error); + } if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); @@ -1791,7 +1811,6 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; @@ -1815,6 +1834,28 @@ error: } /* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + +/* * Add a buffer to the delayed write list. * * This queues a buffer for writeout if it hasn't already been. Note that @@ -2009,6 +2050,66 @@ xfs_buf_delwri_submit( return error; } +/* + * Push a single buffer on a delwri queue. + * + * The purpose of this function is to submit a single buffer of a delwri queue + * and return with the buffer still on the original queue. The waiting delwri + * buffer submission infrastructure guarantees transfer of the delwri queue + * buffer reference to a temporary wait list. We reuse this infrastructure to + * transfer the buffer back to the original queue. + * + * Note the buffer transitions from the queued state, to the submitted and wait + * listed state and back to the queued state during this call. The buffer + * locking and queue management logic between _delwri_pushbuf() and + * _delwri_queue() guarantee that the buffer cannot be queued to another list + * before returning. + */ +int +xfs_buf_delwri_pushbuf( + struct xfs_buf *bp, + struct list_head *buffer_list) +{ + LIST_HEAD (submit_list); + int error; + + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); + + /* + * Isolate the buffer to a new local list so we can submit it for I/O + * independently from the rest of the original list. + */ + xfs_buf_lock(bp); + list_move(&bp->b_list, &submit_list); + xfs_buf_unlock(bp); + + /* + * Delwri submission clears the DELWRI_Q buffer flag and returns with + * the buffer on the wait list with an associated reference. Rather than + * bounce the buffer from a local wait list back to the original list + * after I/O completion, reuse the original list as the wait list. + */ + xfs_buf_delwri_submit_buffers(&submit_list, buffer_list); + + /* + * The buffer is now under I/O and wait listed as during typical delwri + * submission. Lock the buffer to wait for I/O completion. Rather than + * remove the buffer from the wait list and release the reference, we + * want to return with the buffer queued to the original list. The + * buffer already sits on the original list with a wait list reference, + * however. If we let the queue inherit that wait list reference, all we + * need to do is reset the DELWRI_Q flag. + */ + xfs_buf_lock(bp); + error = bp->b_error; + bp->b_flags |= _XBF_DELWRI_Q; + xfs_buf_unlock(bp); + + return error; +} + int __init xfs_buf_init(void) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8a9d3a9599f0..20721261dae5 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -63,7 +63,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ typedef unsigned int xfs_buf_flags_t; @@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_IN_FLIGHT, "IN_FLIGHT" } + { _XBF_COMPOUND, "COMPOUND" } /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ /* * The xfs_buftarg contains 2 notions of "sector size" - @@ -109,7 +108,6 @@ typedef unsigned int xfs_buf_flags_t; typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; - struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; @@ -292,7 +290,6 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); @@ -331,9 +328,11 @@ extern void *xfs_buf_offset(struct xfs_buf *, size_t); extern void xfs_buf_stale(struct xfs_buf *bp); /* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); +extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2975cb2319f4..f6a8422e9562 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -636,20 +636,23 @@ xfs_buf_item_unlock( /* * Clean buffers, by definition, cannot be in the AIL. However, aborted - * buffers may be dirty and hence in the AIL. Therefore if we are - * aborting a buffer and we've just taken the last refernce away, we - * have to check if it is in the AIL before freeing it. We need to free - * it in this case, because an aborted transaction has already shut the - * filesystem down and this is the last chance we will have to do so. + * buffers may be in the AIL regardless of dirty state. An aborted + * transaction that invalidates a buffer already in the AIL may have + * marked it stale and cleared the dirty state, for example. + * + * Therefore if we are aborting a buffer and we've just taken the last + * reference away, we have to check if it is in the AIL before freeing + * it. We need to free it in this case, because an aborted transaction + * has already shut the filesystem down and this is the last chance we + * will have to do so. */ if (atomic_dec_and_test(&bip->bli_refcount)) { - if (clean) - xfs_buf_item_relse(bp); - else if (aborted) { + if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - } + } else if (clean) + xfs_buf_item_relse(bp); } if (!(flags & XFS_BLI_HOLD)) @@ -1162,6 +1165,7 @@ xfs_buf_iodone_callbacks( */ bp->b_last_error = 0; bp->b_retries = 0; + bp->b_first_retry_time = 0; xfs_buf_do_callbacks(bp); bp->b_fspriv = NULL; diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 003a99b83bd8..ba2638d37031 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = { static unsigned char xfs_dir3_get_dtype( struct xfs_mount *mp, - __uint8_t filetype) + uint8_t filetype) { if (!xfs_sb_version_hasftype(&mp->m_sb)) return DT_UNKNOWN; @@ -71,22 +71,11 @@ xfs_dir2_sf_getdents( struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Give up if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; - /* * If the block number in the offset is out of range, we're done. */ @@ -128,7 +117,7 @@ xfs_dir2_sf_getdents( */ sfep = xfs_dir2_sf_firstentry(sfp); for (i = 0; i < sfp->count; i++) { - __uint8_t filetype; + uint8_t filetype; off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, xfs_dir2_sf_get_offset(sfep)); @@ -181,7 +170,7 @@ xfs_dir2_block_getdents( return 0; lock_mode = xfs_ilock_data_map_shared(dp); - error = xfs_dir3_block_read(NULL, dp, &bp); + error = xfs_dir3_block_read(args->trans, dp, &bp); xfs_iunlock(dp, lock_mode); if (error) return error; @@ -205,7 +194,7 @@ xfs_dir2_block_getdents( * Each object is a real entry (dep) or an unused one (dup). */ while (ptr < endptr) { - __uint8_t filetype; + uint8_t filetype; dup = (xfs_dir2_data_unused_t *)ptr; /* @@ -239,7 +228,7 @@ xfs_dir2_block_getdents( if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), xfs_dir3_get_dtype(dp->i_mount, filetype))) { - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(args->trans, bp); return 0; } } @@ -250,211 +239,104 @@ xfs_dir2_block_getdents( */ ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & 0x7fffffff; - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(args->trans, bp); return 0; } -struct xfs_dir2_leaf_map_info { - xfs_extlen_t map_blocks; /* number of fsbs in map */ - xfs_dablk_t map_off; /* last mapped file offset */ - int map_size; /* total entries in *map */ - int map_valid; /* valid entries in *map */ - int nmap; /* mappings to ask xfs_bmapi */ - xfs_dir2_db_t curdb; /* db for current block */ - int ra_current; /* number of read-ahead blks */ - int ra_index; /* *map index for read-ahead */ - int ra_offset; /* map entry offset for ra */ - int ra_want; /* readahead count wanted */ - struct xfs_bmbt_irec map[]; /* map vector for blocks */ -}; - +/* + * Read a directory block and initiate readahead for blocks beyond that. + * We maintain a sliding readahead window of the remaining space in the + * buffer rounded up to the nearest block. + */ STATIC int xfs_dir2_leaf_readbuf( struct xfs_da_args *args, size_t bufsize, - struct xfs_dir2_leaf_map_info *mip, - xfs_dir2_off_t *curoff, - struct xfs_buf **bpp, - bool trim_map) + xfs_dir2_off_t *cur_off, + xfs_dablk_t *ra_blk, + struct xfs_buf **bpp) { struct xfs_inode *dp = args->dp; struct xfs_buf *bp = NULL; - struct xfs_bmbt_irec *map = mip->map; + struct xfs_da_geometry *geo = args->geo; + struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); + struct xfs_bmbt_irec map; struct blk_plug plug; + xfs_dir2_off_t new_off; + xfs_dablk_t next_ra; + xfs_dablk_t map_off; + xfs_dablk_t last_da; + xfs_extnum_t idx; + int ra_want; int error = 0; - int length; - int i; - int j; - struct xfs_da_geometry *geo = args->geo; - - /* - * If the caller just finished processing a buffer, it will tell us - * we need to trim that block out of the mapping now it is done. - */ - if (trim_map) { - mip->map_blocks -= geo->fsbcount; - /* - * Loop to get rid of the extents for the - * directory block. - */ - for (i = geo->fsbcount; i > 0; ) { - j = min_t(int, map->br_blockcount, i); - map->br_blockcount -= j; - map->br_startblock += j; - map->br_startoff += j; - /* - * If mapping is done, pitch it from - * the table. - */ - if (!map->br_blockcount && --mip->map_valid) - memmove(&map[0], &map[1], - sizeof(map[0]) * mip->map_valid); - i -= j; - } - } - - /* - * Recalculate the readahead blocks wanted. - */ - mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1; - ASSERT(mip->ra_want >= 0); - - /* - * If we don't have as many as we want, and we haven't - * run out of data blocks, get some more mappings. - */ - if (1 + mip->ra_want > mip->map_blocks && - mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) { - /* - * Get more bmaps, fill in after the ones - * we already have in the table. - */ - mip->nmap = mip->map_size - mip->map_valid; - error = xfs_bmapi_read(dp, mip->map_off, - xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) - - mip->map_off, - &map[mip->map_valid], &mip->nmap, 0); - /* - * Don't know if we should ignore this or try to return an - * error. The trouble with returning errors is that readdir - * will just stop without actually passing the error through. - */ + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK); if (error) - goto out; /* XXX */ - - /* - * If we got all the mappings we asked for, set the final map - * offset based on the last bmap value received. Otherwise, - * we've reached the end. - */ - if (mip->nmap == mip->map_size - mip->map_valid) { - i = mip->map_valid + mip->nmap - 1; - mip->map_off = map[i].br_startoff + map[i].br_blockcount; - } else - mip->map_off = xfs_dir2_byte_to_da(geo, - XFS_DIR2_LEAF_OFFSET); - - /* - * Look for holes in the mapping, and eliminate them. Count up - * the valid blocks. - */ - for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) { - if (map[i].br_startblock == HOLESTARTBLOCK) { - mip->nmap--; - length = mip->map_valid + mip->nmap - i; - if (length) - memmove(&map[i], &map[i + 1], - sizeof(map[i]) * length); - } else { - mip->map_blocks += map[i].br_blockcount; - i++; - } - } - mip->map_valid += mip->nmap; + goto out; } /* - * No valid mappings, so no more data blocks. + * Look for mapped directory blocks at or above the current offset. + * Truncate down to the nearest directory block to start the scanning + * operation. */ - if (!mip->map_valid) { - *curoff = xfs_dir2_da_to_byte(geo, mip->map_off); + last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); + map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); + if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map)) goto out; - } + if (map.br_startoff >= last_da) + goto out; + xfs_trim_extent(&map, map_off, last_da - map_off); - /* - * Read the directory block starting at the first mapping. - */ - mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff); - error = xfs_dir3_data_read(NULL, dp, map->br_startoff, - map->br_blockcount >= geo->fsbcount ? - XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) : - -1, &bp); - /* - * Should just skip over the data block instead of giving up. - */ + /* Read the directory block of that first mapping. */ + new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); + if (new_off > *cur_off) + *cur_off = new_off; + error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp); if (error) - goto out; /* XXX */ - - /* - * Adjust the current amount of read-ahead: we just read a block that - * was previously ra. - */ - if (mip->ra_current) - mip->ra_current -= geo->fsbcount; + goto out; /* - * Do we need more readahead? + * Start readahead for the next bufsize's worth of dir data blocks. + * We may have already issued readahead for some of that range; + * ra_blk tracks the last block we tried to read(ahead). */ + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); + if (*ra_blk >= last_da) + goto out; + else if (*ra_blk == 0) + *ra_blk = map.br_startoff; + next_ra = map.br_startoff + geo->fsbcount; + if (next_ra >= last_da) + goto out_no_ra; + if (map.br_blockcount < geo->fsbcount && + !xfs_iext_get_extent(ifp, ++idx, &map)) + goto out_no_ra; + if (map.br_startoff >= last_da) + goto out_no_ra; + xfs_trim_extent(&map, next_ra, last_da - next_ra); + + /* Start ra for each dir (not fs) block that has a mapping. */ blk_start_plug(&plug); - for (mip->ra_index = mip->ra_offset = i = 0; - mip->ra_want > mip->ra_current && i < mip->map_blocks; - i += geo->fsbcount) { - ASSERT(mip->ra_index < mip->map_valid); - /* - * Read-ahead a contiguous directory block. - */ - if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= geo->fsbcount) { - xfs_dir3_data_readahead(dp, - map[mip->ra_index].br_startoff + mip->ra_offset, - XFS_FSB_TO_DADDR(dp->i_mount, - map[mip->ra_index].br_startblock + - mip->ra_offset)); - mip->ra_current = i; - } - - /* - * Read-ahead a non-contiguous directory block. This doesn't - * use our mapping, but this is a very rare case. - */ - else if (i > mip->ra_current) { - xfs_dir3_data_readahead(dp, - map[mip->ra_index].br_startoff + - mip->ra_offset, -1); - mip->ra_current = i; - } - - /* - * Advance offset through the mapping table. - */ - for (j = 0; j < geo->fsbcount; j += length ) { - /* - * The rest of this extent but not more than a dir - * block. - */ - length = min_t(int, geo->fsbcount, - map[mip->ra_index].br_blockcount - - mip->ra_offset); - mip->ra_offset += length; - - /* - * Advance to the next mapping if this one is used up. - */ - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { - mip->ra_offset = 0; - mip->ra_index++; + while (ra_want > 0) { + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); + while (ra_want > 0 && + next_ra < map.br_startoff + map.br_blockcount) { + if (next_ra >= last_da) { + *ra_blk = last_da; + break; + } + if (next_ra > *ra_blk) { + xfs_dir3_data_readahead(dp, next_ra, -2); + *ra_blk = next_ra; } + ra_want -= geo->fsbcount; + next_ra += geo->fsbcount; + } + if (!xfs_iext_get_extent(ifp, ++idx, &map)) { + *ra_blk = last_da; + break; } } blk_finish_plug(&plug); @@ -462,6 +344,9 @@ xfs_dir2_leaf_readbuf( out: *bpp = bp; return error; +out_no_ra: + *ra_blk = last_da; + goto out; } /* @@ -479,14 +364,14 @@ xfs_dir2_leaf_getdents( xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ - int error = 0; /* error return value */ - int length; /* temporary length value */ - int byteoff; /* offset in current block */ - xfs_dir2_off_t curoff; /* current overall offset */ - xfs_dir2_off_t newoff; /* new curoff after new blk */ char *ptr = NULL; /* pointer to current data */ - struct xfs_dir2_leaf_map_info *map_info; struct xfs_da_geometry *geo = args->geo; + xfs_dablk_t rablk = 0; /* current readahead block */ + xfs_dir2_off_t curoff; /* current overall offset */ + int length; /* temporary length value */ + int byteoff; /* offset in current block */ + int lock_mode; + int error = 0; /* error return value */ /* * If the offset is at or past the largest allowed value, @@ -496,73 +381,35 @@ xfs_dir2_leaf_getdents( return 0; /* - * Set up to bmap a number of blocks based on the caller's - * buffer size, the directory block size, and the filesystem - * block size. - */ - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + - (length * sizeof(struct xfs_bmbt_irec)), - KM_SLEEP | KM_NOFS); - map_info->map_size = length; - - /* * Inside the loop we keep the main offset value as a byte offset * in the directory file. */ curoff = xfs_dir2_dataptr_to_byte(ctx->pos); /* - * Force this conversion through db so we truncate the offset - * down to get the start of the data block. - */ - map_info->map_off = xfs_dir2_db_to_da(geo, - xfs_dir2_byte_to_db(geo, curoff)); - - /* * Loop over directory entries until we reach the end offset. * Get more blocks and readahead as necessary. */ while (curoff < XFS_DIR2_LEAF_OFFSET) { - __uint8_t filetype; + uint8_t filetype; /* * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { - int lock_mode; - bool trim_map = false; - if (bp) { - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(args->trans, bp); bp = NULL; - trim_map = true; } lock_mode = xfs_ilock_data_map_shared(dp); - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, - &curoff, &bp, trim_map); + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, + &rablk, &bp); xfs_iunlock(dp, lock_mode); - if (error || !map_info->map_valid) + if (error || !bp) break; - /* - * Having done a read, we need to set a new offset. - */ - newoff = xfs_dir2_db_off_to_byte(geo, - map_info->curdb, 0); - /* - * Start of the current block. - */ - if (curoff < newoff) - curoff = newoff; - /* - * Make sure we're in the right block. - */ - else if (curoff > newoff) - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == - map_info->curdb); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); /* @@ -647,17 +494,22 @@ xfs_dir2_leaf_getdents( ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; - kmem_free(map_info); if (bp) - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(args->trans, bp); return error; } /* * Read a directory. + * + * If supplied, the transaction collects locked dir buffers to avoid + * nested buffer deadlocks. This function does not dirty the + * transaction. The caller should ensure that the inode is locked + * before calling this function. */ int xfs_readdir( + struct xfs_trans *tp, struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize) @@ -676,6 +528,7 @@ xfs_readdir( args.dp = dp; args.geo = dp->i_mount->m_dir_geo; + args.trans = tp; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_getdents(&args, ctx); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 4ff499aa7338..b2cde5426182 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -39,7 +39,7 @@ xfs_trim_extents( xfs_daddr_t start, xfs_daddr_t end, xfs_daddr_t minlen, - __uint64_t *blocks_trimmed) + uint64_t *blocks_trimmed) { struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; @@ -132,6 +132,11 @@ next_extent: error = xfs_btree_decrement(cur, 0, &i); if (error) goto out_del_cursor; + + if (fatal_signal_pending(current)) { + error = -ERESTARTSYS; + goto out_del_cursor; + } } out_del_cursor: @@ -161,7 +166,7 @@ xfs_ioc_trim( struct fstrim_range range; xfs_daddr_t start, end, minlen; xfs_agnumber_t start_agno, end_agno, agno; - __uint64_t blocks_trimmed = 0; + uint64_t blocks_trimmed = 0; int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) @@ -196,8 +201,11 @@ xfs_ioc_trim( for (agno = start_agno; agno <= end_agno; agno++) { error = xfs_trim_extents(mp, agno, start, end, minlen, &blocks_trimmed); - if (error) + if (error) { last_error = error; + if (error == -ERESTARTSYS) + break; + } } if (last_error) @@ -208,32 +216,3 @@ xfs_ioc_trim( return -EFAULT; return 0; } - -int -xfs_discard_extents( - struct xfs_mount *mp, - struct list_head *list) -{ - struct xfs_extent_busy *busyp; - int error = 0; - - list_for_each_entry(busyp, list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); - - error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), - XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, 0); - if (error && error != -EOPNOTSUPP) { - xfs_info(mp, - "discard failed for extent [0x%llx,%u], error %d", - (unsigned long long)busyp->bno, - busyp->length, - error); - return error; - } - } - - return 0; -} diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h index 344879aea646..0f070f9e44e1 100644 --- a/fs/xfs/xfs_discard.h +++ b/fs/xfs/xfs_discard.h @@ -5,6 +5,5 @@ struct fstrim_range; struct list_head; extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); -extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); #endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9d06cc30e875..fd2ef8c2c9a7 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -276,7 +276,7 @@ xfs_qm_init_dquot_blk( void xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) { - __uint64_t space; + uint64_t space; dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); @@ -695,21 +695,18 @@ error0: */ static int xfs_dq_get_next_id( - xfs_mount_t *mp, + struct xfs_mount *mp, uint type, - xfs_dqid_t *id, - loff_t eof) + xfs_dqid_t *id) { - struct xfs_inode *quotip; + struct xfs_inode *quotip = xfs_quota_inode(mp, type); + xfs_dqid_t next_id = *id + 1; /* simple advance */ + uint lock_flags; + struct xfs_bmbt_irec got; + xfs_extnum_t idx; xfs_fsblock_t start; - loff_t offset; - uint lock; - xfs_dqid_t next_id; int error = 0; - /* Simple advance */ - next_id = *id + 1; - /* If we'd wrap past the max ID, stop */ if (next_id < *id) return -ENOENT; @@ -723,23 +720,25 @@ xfs_dq_get_next_id( /* Nope, next_id is now past the current chunk, so find the next one */ start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; - quotip = xfs_quota_inode(mp, type); - lock = xfs_ilock_data_map_shared(quotip); - - offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start), - eof, SEEK_DATA); - if (offset < 0) - error = offset; + lock_flags = xfs_ilock_data_map_shared(quotip); + if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK); + if (error) + return error; + } - xfs_iunlock(quotip, lock); + if (xfs_iext_lookup_extent(quotip, "ip->i_df, start, &idx, &got)) { + /* contiguous chunk, bump startoff for the id calculation */ + if (got.br_startoff < start) + got.br_startoff = start; + *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk; + } else { + error = -ENOENT; + } - /* -ENXIO is essentially "no more data" */ - if (error) - return (error == -ENXIO ? -ENOENT: error); + xfs_iunlock(quotip, lock_flags); - /* Convert next data offset back to a quota id */ - *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk; - return 0; + return error; } /* @@ -762,7 +761,6 @@ xfs_qm_dqget( struct xfs_quotainfo *qi = mp->m_quotainfo; struct radix_tree_root *tree = xfs_dquot_tree(qi, type); struct xfs_dquot *dqp; - loff_t eof = 0; int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -790,21 +788,6 @@ xfs_qm_dqget( } #endif - /* Get the end of the quota file if we need it */ - if (flags & XFS_QMOPT_DQNEXT) { - struct xfs_inode *quotip; - xfs_fileoff_t last; - uint lock_mode; - - quotip = xfs_quota_inode(mp, type); - lock_mode = xfs_ilock_data_map_shared(quotip); - error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK); - xfs_iunlock(quotip, lock_mode); - if (error) - return error; - eof = XFS_FSB_TO_B(mp, last); - } - restart: mutex_lock(&qi->qi_tree_lock); dqp = radix_tree_lookup(tree, id); @@ -823,7 +806,7 @@ restart: if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { xfs_dqunlock(dqp); mutex_unlock(&qi->qi_tree_lock); - error = xfs_dq_get_next_id(mp, type, &id, eof); + error = xfs_dq_get_next_id(mp, type, &id); if (error) return error; goto restart; @@ -858,7 +841,7 @@ restart: /* If we are asked to find next active id, keep looking */ if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) { - error = xfs_dq_get_next_id(mp, type, &id, eof); + error = xfs_dq_get_next_id(mp, type, &id); if (!error) goto restart; } @@ -917,7 +900,7 @@ restart: if (flags & XFS_QMOPT_DQNEXT) { if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { xfs_qm_dqput(dqp); - error = xfs_dq_get_next_id(mp, type, &id, eof); + error = xfs_dq_get_next_id(mp, type, &id); if (error) return error; goto restart; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed7ee4e8af73..2f4feb959bfb 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -22,103 +22,280 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_error.h" +#include "xfs_sysfs.h" #ifdef DEBUG -int xfs_etest[XFS_NUM_INJECT_ERROR]; -int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; -char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; -int xfs_error_test_active; +static unsigned int xfs_errortag_random_default[] = { + XFS_RANDOM_DEFAULT, + XFS_RANDOM_IFLUSH_1, + XFS_RANDOM_IFLUSH_2, + XFS_RANDOM_IFLUSH_3, + XFS_RANDOM_IFLUSH_4, + XFS_RANDOM_IFLUSH_5, + XFS_RANDOM_IFLUSH_6, + XFS_RANDOM_DA_READ_BUF, + XFS_RANDOM_BTREE_CHECK_LBLOCK, + XFS_RANDOM_BTREE_CHECK_SBLOCK, + XFS_RANDOM_ALLOC_READ_AGF, + XFS_RANDOM_IALLOC_READ_AGI, + XFS_RANDOM_ITOBP_INOTOBP, + XFS_RANDOM_IUNLINK, + XFS_RANDOM_IUNLINK_REMOVE, + XFS_RANDOM_DIR_INO_VALIDATE, + XFS_RANDOM_BULKSTAT_READ_CHUNK, + XFS_RANDOM_IODONE_IOERR, + XFS_RANDOM_STRATREAD_IOERR, + XFS_RANDOM_STRATCMPL_IOERR, + XFS_RANDOM_DIOWRITE_IOERR, + XFS_RANDOM_BMAPIFORMAT, + XFS_RANDOM_FREE_EXTENT, + XFS_RANDOM_RMAP_FINISH_ONE, + XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE, + XFS_RANDOM_REFCOUNT_FINISH_ONE, + XFS_RANDOM_BMAP_FINISH_ONE, + XFS_RANDOM_AG_RESV_CRITICAL, + XFS_RANDOM_DROP_WRITES, + XFS_RANDOM_LOG_BAD_CRC, +}; -int -xfs_error_test(int error_tag, int *fsidp, char *expression, - int line, char *file, unsigned long randfactor) +struct xfs_errortag_attr { + struct attribute attr; + unsigned int tag; +}; + +static inline struct xfs_errortag_attr * +to_attr(struct attribute *attr) { - int i; - int64_t fsid; + return container_of(attr, struct xfs_errortag_attr, attr); +} - if (prandom_u32() % randfactor) - return 0; +static inline struct xfs_mount * +to_mp(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); - memcpy(&fsid, fsidp, sizeof(xfs_fsid_t)); + return container_of(kobj, struct xfs_mount, m_errortag_kobj); +} + +STATIC ssize_t +xfs_errortag_attr_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xfs_mount *mp = to_mp(kobject); + struct xfs_errortag_attr *xfs_attr = to_attr(attr); + int ret; + unsigned int val; - for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { - if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { - xfs_warn(NULL, - "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", - expression, file, line, xfs_etest_fsname[i]); - return 1; - } + if (strcmp(buf, "default") == 0) { + val = xfs_errortag_random_default[xfs_attr->tag]; + } else { + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; } - return 0; + ret = xfs_errortag_set(mp, xfs_attr->tag, val); + if (ret) + return ret; + return count; } +STATIC ssize_t +xfs_errortag_attr_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xfs_mount *mp = to_mp(kobject); + struct xfs_errortag_attr *xfs_attr = to_attr(attr); + + return snprintf(buf, PAGE_SIZE, "%u\n", + xfs_errortag_get(mp, xfs_attr->tag)); +} + +static const struct sysfs_ops xfs_errortag_sysfs_ops = { + .show = xfs_errortag_attr_show, + .store = xfs_errortag_attr_store, +}; + +#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \ +static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \ + .tag = (_tag), \ +} + +#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr + +XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR); +XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1); +XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2); +XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3); +XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4); +XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5); +XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6); +XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF); +XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK); +XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK); +XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF); +XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI); +XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP); +XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK); +XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE); +XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE); +XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK); +XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR); +XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR); +XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR); +XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR); +XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT); +XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT); +XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE); +XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE); +XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); +XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); +XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); +XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); +XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); + +static struct attribute *xfs_errortag_attrs[] = { + XFS_ERRORTAG_ATTR_LIST(noerror), + XFS_ERRORTAG_ATTR_LIST(iflush1), + XFS_ERRORTAG_ATTR_LIST(iflush2), + XFS_ERRORTAG_ATTR_LIST(iflush3), + XFS_ERRORTAG_ATTR_LIST(iflush4), + XFS_ERRORTAG_ATTR_LIST(iflush5), + XFS_ERRORTAG_ATTR_LIST(iflush6), + XFS_ERRORTAG_ATTR_LIST(dareadbuf), + XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk), + XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk), + XFS_ERRORTAG_ATTR_LIST(readagf), + XFS_ERRORTAG_ATTR_LIST(readagi), + XFS_ERRORTAG_ATTR_LIST(itobp), + XFS_ERRORTAG_ATTR_LIST(iunlink), + XFS_ERRORTAG_ATTR_LIST(iunlinkrm), + XFS_ERRORTAG_ATTR_LIST(dirinovalid), + XFS_ERRORTAG_ATTR_LIST(bulkstat), + XFS_ERRORTAG_ATTR_LIST(logiodone), + XFS_ERRORTAG_ATTR_LIST(stratread), + XFS_ERRORTAG_ATTR_LIST(stratcmpl), + XFS_ERRORTAG_ATTR_LIST(diowrite), + XFS_ERRORTAG_ATTR_LIST(bmapifmt), + XFS_ERRORTAG_ATTR_LIST(free_extent), + XFS_ERRORTAG_ATTR_LIST(rmap_finish_one), + XFS_ERRORTAG_ATTR_LIST(refcount_continue_update), + XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), + XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), + XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), + XFS_ERRORTAG_ATTR_LIST(drop_writes), + XFS_ERRORTAG_ATTR_LIST(log_bad_crc), + NULL, +}; + +struct kobj_type xfs_errortag_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_errortag_sysfs_ops, + .default_attrs = xfs_errortag_attrs, +}; + int -xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp) +xfs_errortag_init( + struct xfs_mount *mp) { - int i; - int len; - int64_t fsid; + mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, + KM_SLEEP | KM_MAYFAIL); + if (!mp->m_errortag) + return -ENOMEM; - if (error_tag >= XFS_ERRTAG_MAX) - return -EINVAL; + return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + &mp->m_kobj, "errortag"); +} - memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t)); +void +xfs_errortag_del( + struct xfs_mount *mp) +{ + xfs_sysfs_del(&mp->m_errortag_kobj); + kmem_free(mp->m_errortag); +} - for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { - if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { - xfs_warn(mp, "error tag #%d on", error_tag); - return 0; - } - } +bool +xfs_errortag_test( + struct xfs_mount *mp, + const char *expression, + const char *file, + int line, + unsigned int error_tag) +{ + unsigned int randfactor; - for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { - if (xfs_etest[i] == 0) { - xfs_warn(mp, "Turned on XFS error tag #%d", - error_tag); - xfs_etest[i] = error_tag; - xfs_etest_fsid[i] = fsid; - len = strlen(mp->m_fsname); - xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); - strcpy(xfs_etest_fsname[i], mp->m_fsname); - xfs_error_test_active++; - return 0; - } - } + /* + * To be able to use error injection anywhere, we need to ensure error + * injection mechanism is already initialized. + * + * Code paths like I/O completion can be called before the + * initialization is complete, but be able to inject errors in such + * places is still useful. + */ + if (!mp->m_errortag) + return false; - xfs_warn(mp, "error tag overflow, too many turned on"); + ASSERT(error_tag < XFS_ERRTAG_MAX); + randfactor = mp->m_errortag[error_tag]; + if (!randfactor || prandom_u32() % randfactor) + return false; - return 1; + xfs_warn_ratelimited(mp, +"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", + expression, file, line, mp->m_fsname); + return true; } int -xfs_errortag_clearall(xfs_mount_t *mp, int loud) +xfs_errortag_get( + struct xfs_mount *mp, + unsigned int error_tag) { - int64_t fsid; - int cleared = 0; - int i; - - memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t)); - - - for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { - if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && - xfs_etest[i] != 0) { - cleared = 1; - xfs_warn(mp, "Clearing XFS error tag #%d", - xfs_etest[i]); - xfs_etest[i] = 0; - xfs_etest_fsid[i] = 0LL; - kmem_free(xfs_etest_fsname[i]); - xfs_etest_fsname[i] = NULL; - xfs_error_test_active--; - } - } + if (error_tag >= XFS_ERRTAG_MAX) + return -EINVAL; + + return mp->m_errortag[error_tag]; +} + +int +xfs_errortag_set( + struct xfs_mount *mp, + unsigned int error_tag, + unsigned int tag_value) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return -EINVAL; - if (loud || cleared) - xfs_warn(mp, "Cleared all XFS error tags for filesystem"); + mp->m_errortag[error_tag] = tag_value; + return 0; +} +int +xfs_errortag_add( + struct xfs_mount *mp, + unsigned int error_tag) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return -EINVAL; + + return xfs_errortag_set(mp, error_tag, + xfs_errortag_random_default[error_tag]); +} + +int +xfs_errortag_clearall( + struct xfs_mount *mp) +{ + memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX); return 0; } #endif /* DEBUG */ diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 05f8666733a0..7577be5f09bc 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -96,7 +96,17 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 #define XFS_ERRTAG_BMAP_FINISH_ONE 26 #define XFS_ERRTAG_AG_RESV_CRITICAL 27 -#define XFS_ERRTAG_MAX 28 +/* + * DEBUG mode instrumentation to test and/or trigger delayed allocation + * block killing in the event of failed writes. When enabled, all + * buffered writes are silenty dropped and handled as if they failed. + * All delalloc blocks in the range of the write (including pre-existing + * delalloc blocks!) are tossed as part of the write failure error + * handling sequence. + */ +#define XFS_ERRTAG_DROP_WRITES 28 +#define XFS_ERRTAG_LOG_BAD_CRC 29 +#define XFS_ERRTAG_MAX 30 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -129,23 +139,29 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 #define XFS_RANDOM_BMAP_FINISH_ONE 1 #define XFS_RANDOM_AG_RESV_CRITICAL 4 +#define XFS_RANDOM_DROP_WRITES 1 +#define XFS_RANDOM_LOG_BAD_CRC 1 #ifdef DEBUG -extern int xfs_error_test_active; -extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); - -#define XFS_NUM_INJECT_ERROR 10 -#define XFS_TEST_ERROR(expr, mp, tag, rf) \ - ((expr) || (xfs_error_test_active && \ - xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ - (rf)))) +extern int xfs_errortag_init(struct xfs_mount *mp); +extern void xfs_errortag_del(struct xfs_mount *mp); +extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression, + const char *file, int line, unsigned int error_tag); +#define XFS_TEST_ERROR(expr, mp, tag) \ + ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag))) -extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp); -extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); +extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag); +extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag, + unsigned int tag_value); +extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag); +extern int xfs_errortag_clearall(struct xfs_mount *mp); #else -#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) -#define xfs_errortag_add(tag, mp) (ENOSYS) -#define xfs_errortag_clearall(mp, loud) (ENOSYS) +#define xfs_errortag_init(mp) (0) +#define xfs_errortag_del(mp) +#define XFS_TEST_ERROR(expr, mp, tag) (expr) +#define xfs_errortag_set(mp, tag, val) (ENOSYS) +#define xfs_errortag_add(mp, tag) (ENOSYS) +#define xfs_errortag_clearall(mp) (ENOSYS) #endif /* DEBUG */ /* diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 162dc186cf04..77760dbf0242 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -45,18 +45,7 @@ xfs_extent_busy_insert( struct rb_node **rbp; struct rb_node *parent = NULL; - new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); - if (!new) { - /* - * No Memory! Since it is now not possible to track the free - * block, make this a synchronous transaction to insure that - * the block is not reused before this transaction commits. - */ - trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); - xfs_trans_set_sync(tp); - return; - } - + new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); new->agno = agno; new->bno = bno; new->length = len; @@ -345,25 +334,31 @@ restart: * subset of the extent that is not busy. If *rlen is smaller than * args->minlen no suitable extent could be found, and the higher level * code needs to force out the log and retry the allocation. + * + * Return the current busy generation for the AG if the extent is busy. This + * value can be used to wait for at least one of the currently busy extents + * to be cleared. Note that the busy list is not guaranteed to be empty after + * the gen is woken. The state of a specific extent must always be confirmed + * with another call to xfs_extent_busy_trim() before it can be used. */ -void +bool xfs_extent_busy_trim( struct xfs_alloc_arg *args, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_agblock_t *rbno, - xfs_extlen_t *rlen) + xfs_agblock_t *bno, + xfs_extlen_t *len, + unsigned *busy_gen) { xfs_agblock_t fbno; xfs_extlen_t flen; struct rb_node *rbp; + bool ret = false; - ASSERT(len > 0); + ASSERT(*len > 0); spin_lock(&args->pag->pagb_lock); restart: - fbno = bno; - flen = len; + fbno = *bno; + flen = *len; rbp = args->pag->pagb_tree.rb_node; while (rbp && flen >= args->minlen) { struct xfs_extent_busy *busyp = @@ -515,24 +510,25 @@ restart: flen = fend - fbno; } - spin_unlock(&args->pag->pagb_lock); +out: - if (fbno != bno || flen != len) { - trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, + if (fbno != *bno || flen != *len) { + trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len, fbno, flen); + *bno = fbno; + *len = flen; + *busy_gen = args->pag->pagb_gen; + ret = true; } - *rbno = fbno; - *rlen = flen; - return; + spin_unlock(&args->pag->pagb_lock); + return ret; fail: /* * Return a zero extent length as failure indications. All callers * re-check if the trimmed extent satisfies the minlen requirement. */ - spin_unlock(&args->pag->pagb_lock); - trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); - *rbno = fbno; - *rlen = 0; + flen = 0; + goto out; } STATIC void @@ -551,6 +547,21 @@ xfs_extent_busy_clear_one( kmem_free(busyp); } +static void +xfs_extent_busy_put_pag( + struct xfs_perag *pag, + bool wakeup) + __releases(pag->pagb_lock) +{ + if (wakeup) { + pag->pagb_gen++; + wake_up_all(&pag->pagb_wait); + } + + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); +} + /* * Remove all extents on the passed in list from the busy extents tree. * If do_discard is set skip extents that need to be discarded, and mark @@ -565,27 +576,76 @@ xfs_extent_busy_clear( struct xfs_extent_busy *busyp, *n; struct xfs_perag *pag = NULL; xfs_agnumber_t agno = NULLAGNUMBER; + bool wakeup = false; list_for_each_entry_safe(busyp, n, list, list) { if (busyp->agno != agno) { - if (pag) { - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); - } - pag = xfs_perag_get(mp, busyp->agno); - spin_lock(&pag->pagb_lock); + if (pag) + xfs_extent_busy_put_pag(pag, wakeup); agno = busyp->agno; + pag = xfs_perag_get(mp, agno); + spin_lock(&pag->pagb_lock); + wakeup = false; } if (do_discard && busyp->length && - !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) + !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { busyp->flags = XFS_EXTENT_BUSY_DISCARDED; - else + } else { xfs_extent_busy_clear_one(mp, pag, busyp); + wakeup = true; + } } - if (pag) { - spin_unlock(&pag->pagb_lock); + if (pag) + xfs_extent_busy_put_pag(pag, wakeup); +} + +/* + * Flush out all busy extents for this AG. + */ +void +xfs_extent_busy_flush( + struct xfs_mount *mp, + struct xfs_perag *pag, + unsigned busy_gen) +{ + DEFINE_WAIT (wait); + int log_flushed = 0, error; + + trace_xfs_log_force(mp, 0, _THIS_IP_); + error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed); + if (error) + return; + + do { + prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); + if (busy_gen != READ_ONCE(pag->pagb_gen)) + break; + schedule(); + } while (1); + + finish_wait(&pag->pagb_wait, &wait); +} + +void +xfs_extent_busy_wait_all( + struct xfs_mount *mp) +{ + DEFINE_WAIT (wait); + xfs_agnumber_t agno; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + struct xfs_perag *pag = xfs_perag_get(mp, agno); + + do { + prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); + if (RB_EMPTY_ROOT(&pag->pagb_tree)) + break; + schedule(); + } while (1); + finish_wait(&pag->pagb_wait, &wait); + xfs_perag_put(pag); } } @@ -596,9 +656,17 @@ xfs_extent_busy_clear( int xfs_extent_busy_ag_cmp( void *priv, - struct list_head *a, - struct list_head *b) + struct list_head *l1, + struct list_head *l2) { - return container_of(a, struct xfs_extent_busy, list)->agno - - container_of(b, struct xfs_extent_busy, list)->agno; + struct xfs_extent_busy *b1 = + container_of(l1, struct xfs_extent_busy, list); + struct xfs_extent_busy *b2 = + container_of(l2, struct xfs_extent_busy, list); + s32 diff; + + diff = b1->agno - b2->agno; + if (!diff) + diff = b1->bno - b2->bno; + return diff; } diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index bfff284d2dcc..60195ea1b84a 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -58,9 +58,16 @@ void xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); +bool +xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, + xfs_extlen_t *len, unsigned *busy_gen); + +void +xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, + unsigned busy_gen); + void -xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, - xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen); +xfs_extent_busy_wait_all(struct xfs_mount *mp); int xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index d7bc14906af8..44f8c5451210 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -290,6 +290,7 @@ void xfs_efi_release( struct xfs_efi_log_item *efip) { + ASSERT(atomic_read(&efip->efi_refcount) > 0); if (atomic_dec_and_test(&efip->efi_refcount)) { xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); xfs_efi_item_free(efip); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bbb9eb6811b2..c4893e226fd8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -140,7 +140,7 @@ xfs_file_fsync( trace_xfs_file_fsync(ip); - error = filemap_write_and_wait_range(inode->i_mapping, start, end); + error = file_write_and_wait_range(file, start, end); if (error) return error; @@ -237,7 +237,11 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -527,12 +531,25 @@ xfs_file_dio_aio_write( if ((iocb->ki_pos & mp->m_blockmask) || ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; + + /* + * We can't properly handle unaligned direct I/O to reflink + * files yet, as we can't unshare a partial block. + */ + if (xfs_is_reflink_inode(ip)) { + trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); + return -EREMCHG; + } iolock = XFS_IOLOCK_EXCL; } else { iolock = XFS_IOLOCK_SHARED; } - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) @@ -544,22 +561,20 @@ xfs_file_dio_aio_write( * otherwise demote the lock if we had to take the exclusive lock * for other reasons in xfs_file_aio_write_checks. */ - if (unaligned_io) - inode_dio_wait(inode); - else if (iolock == XFS_IOLOCK_EXCL) { + if (unaligned_io) { + /* If we are going to wait for other DIO to finish, bail */ + if (iocb->ki_flags & IOCB_NOWAIT) { + if (atomic_read(&inode->i_dio_count)) + return -EAGAIN; + } else { + inode_dio_wait(inode); + } + } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos); - - /* If this is a block-aligned directio CoW, remap immediately. */ - if (xfs_is_reflink_inode(ip) && !unaligned_io) { - ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count); - if (ret) - goto out; - } - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); out: xfs_iunlock(ip, iolock); @@ -584,7 +599,12 @@ xfs_file_dax_write( size_t count; loff_t pos; - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; @@ -614,8 +634,10 @@ xfs_file_buffered_aio_write( struct xfs_inode *ip = XFS_I(inode); ssize_t ret; int enospc = 0; - int iolock = XFS_IOLOCK_EXCL; + int iolock; +write_retry: + iolock = XFS_IOLOCK_EXCL; xfs_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); @@ -625,7 +647,6 @@ xfs_file_buffered_aio_write( /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); -write_retry: trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); if (likely(ret >= 0)) @@ -641,26 +662,31 @@ write_retry: * running at the same time. */ if (ret == -EDQUOT && !enospc) { + xfs_iunlock(ip, iolock); enospc = xfs_inode_free_quota_eofblocks(ip); if (enospc) goto write_retry; enospc = xfs_inode_free_quota_cowblocks(ip); if (enospc) goto write_retry; + iolock = 0; } else if (ret == -ENOSPC && !enospc) { struct xfs_eofblocks eofb = {0}; enospc = 1; xfs_flush_inodes(ip->i_mount); - eofb.eof_scan_owner = ip->i_ino; /* for locking */ + + xfs_iunlock(ip, iolock); eofb.eof_flags = XFS_EOF_FLAGS_SYNC; xfs_icache_free_eofblocks(ip->i_mount, &eofb); + xfs_icache_free_cowblocks(ip->i_mount, &eofb); goto write_retry; } current->backing_dev_info = NULL; out: - xfs_iunlock(ip, iolock); + if (iolock) + xfs_iunlock(ip, iolock); return ret; } @@ -748,7 +774,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; @@ -770,7 +796,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_INSERT_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; new_size = i_size_read(inode) + len; if (offset & blksize_mask || len & blksize_mask) { @@ -886,6 +912,7 @@ xfs_file_open( return -EFBIG; if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) return -EIO; + file->f_mode |= FMODE_AIO_NOWAIT; return 0; } @@ -908,9 +935,9 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(ip, 0, -1); + error = xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); - return 0; + return error; } STATIC int @@ -944,395 +971,7 @@ xfs_file_readdir( */ bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); - return xfs_readdir(ip, ctx, bufsize); -} - -/* - * This type is designed to indicate the type of offset we would like - * to search from page cache for xfs_seek_hole_data(). - */ -enum { - HOLE_OFF = 0, - DATA_OFF, -}; - -/* - * Lookup the desired type of offset from the given page. - * - * On success, return true and the offset argument will point to the - * start of the region that was found. Otherwise this function will - * return false and keep the offset argument unchanged. - */ -STATIC bool -xfs_lookup_buffer_offset( - struct page *page, - loff_t *offset, - unsigned int type) -{ - loff_t lastoff = page_offset(page); - bool found = false; - struct buffer_head *bh, *head; - - bh = head = page_buffers(page); - do { - /* - * Unwritten extents that have data in the page - * cache covering them can be identified by the - * BH_Unwritten state flag. Pages with multiple - * buffers might have a mix of holes, data and - * unwritten extents - any buffer with valid - * data in it should have BH_Uptodate flag set - * on it. - */ - if (buffer_unwritten(bh) || - buffer_uptodate(bh)) { - if (type == DATA_OFF) - found = true; - } else { - if (type == HOLE_OFF) - found = true; - } - - if (found) { - *offset = lastoff; - break; - } - lastoff += bh->b_size; - } while ((bh = bh->b_this_page) != head); - - return found; -} - -/* - * This routine is called to find out and return a data or hole offset - * from the page cache for unwritten extents according to the desired - * type for xfs_seek_hole_data(). - * - * The argument offset is used to tell where we start to search from the - * page cache. Map is used to figure out the end points of the range to - * lookup pages. - * - * Return true if the desired type of offset was found, and the argument - * offset is filled with that address. Otherwise, return false and keep - * offset unchanged. - */ -STATIC bool -xfs_find_get_desired_pgoff( - struct inode *inode, - struct xfs_bmbt_irec *map, - unsigned int type, - loff_t *offset) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct pagevec pvec; - pgoff_t index; - pgoff_t end; - loff_t endoff; - loff_t startoff = *offset; - loff_t lastoff = startoff; - bool found = false; - - pagevec_init(&pvec, 0); - - index = startoff >> PAGE_SHIFT; - endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); - end = endoff >> PAGE_SHIFT; - do { - int want; - unsigned nr_pages; - unsigned int i; - - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); - nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, - want); - /* - * No page mapped into given range. If we are searching holes - * and if this is the first time we got into the loop, it means - * that the given offset is landed in a hole, return it. - * - * If we have already stepped through some block buffers to find - * holes but they all contains data. In this case, the last - * offset is already updated and pointed to the end of the last - * mapped page, if it does not reach the endpoint to search, - * that means there should be a hole between them. - */ - if (nr_pages == 0) { - /* Data search found nothing */ - if (type == DATA_OFF) - break; - - ASSERT(type == HOLE_OFF); - if (lastoff == startoff || lastoff < endoff) { - found = true; - *offset = lastoff; - } - break; - } - - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - loff_t b_offset; - - /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), - * or even swizzled back from swapper_space to tmpfs - * file mapping. However, page->index will not change - * because we have a reference on the page. - * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. - */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } - goto out; - } - - lock_page(page); - /* - * Page truncated or invalidated(page->mapping == NULL). - * We can freely skip it and proceed to check the next - * page. - */ - if (unlikely(page->mapping != inode->i_mapping)) { - unlock_page(page); - continue; - } - - if (!page_has_buffers(page)) { - unlock_page(page); - continue; - } - - found = xfs_lookup_buffer_offset(page, &b_offset, type); - if (found) { - /* - * The found offset may be less than the start - * point to search if this is the first time to - * come here. - */ - *offset = max_t(loff_t, startoff, b_offset); - unlock_page(page); - goto out; - } - - /* - * We either searching data but nothing was found, or - * searching hole but found a data buffer. In either - * case, probably the next page contains the desired - * things, update the last offset to it so. - */ - lastoff = page_offset(page) + PAGE_SIZE; - unlock_page(page); - } - - /* - * The number of returned pages less than our desired, search - * done. In this case, nothing was found for searching data, - * but we found a hole behind the last offset. - */ - if (nr_pages < want) { - if (type == HOLE_OFF) { - *offset = lastoff; - found = true; - } - break; - } - - index = pvec.pages[i - 1]->index + 1; - pagevec_release(&pvec); - } while (index <= end); - -out: - pagevec_release(&pvec); - return found; -} - -/* - * caller must lock inode with xfs_ilock_data_map_shared, - * can we craft an appropriate ASSERT? - * - * end is because the VFS-level lseek interface is defined such that any - * offset past i_size shall return -ENXIO, but we use this for quota code - * which does not maintain i_size, and we want to SEEK_DATA past i_size. - */ -loff_t -__xfs_seek_hole_data( - struct inode *inode, - loff_t start, - loff_t end, - int whence) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - loff_t uninitialized_var(offset); - xfs_fileoff_t fsbno; - xfs_filblks_t lastbno; - int error; - - if (start >= end) { - error = -ENXIO; - goto out_error; - } - - /* - * Try to read extents from the first block indicated - * by fsbno to the end block of the file. - */ - fsbno = XFS_B_TO_FSBT(mp, start); - lastbno = XFS_B_TO_FSB(mp, end); - - for (;;) { - struct xfs_bmbt_irec map[2]; - int nmap = 2; - unsigned int i; - - error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap, - XFS_BMAPI_ENTIRE); - if (error) - goto out_error; - - /* No extents at given offset, must be beyond EOF */ - if (nmap == 0) { - error = -ENXIO; - goto out_error; - } - - for (i = 0; i < nmap; i++) { - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[i].br_startoff)); - - /* Landed in the hole we wanted? */ - if (whence == SEEK_HOLE && - map[i].br_startblock == HOLESTARTBLOCK) - goto out; - - /* Landed in the data extent we wanted? */ - if (whence == SEEK_DATA && - (map[i].br_startblock == DELAYSTARTBLOCK || - (map[i].br_state == XFS_EXT_NORM && - !isnullstartblock(map[i].br_startblock)))) - goto out; - - /* - * Landed in an unwritten extent, try to search - * for hole or data from page cache. - */ - if (map[i].br_state == XFS_EXT_UNWRITTEN) { - if (xfs_find_get_desired_pgoff(inode, &map[i], - whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF, - &offset)) - goto out; - } - } - - /* - * We only received one extent out of the two requested. This - * means we've hit EOF and didn't find what we are looking for. - */ - if (nmap == 1) { - /* - * If we were looking for a hole, set offset to - * the end of the file (i.e., there is an implicit - * hole at the end of any file). - */ - if (whence == SEEK_HOLE) { - offset = end; - break; - } - /* - * If we were looking for data, it's nowhere to be found - */ - ASSERT(whence == SEEK_DATA); - error = -ENXIO; - goto out_error; - } - - ASSERT(i > 1); - - /* - * Nothing was found, proceed to the next round of search - * if the next reading offset is not at or beyond EOF. - */ - fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; - start = XFS_FSB_TO_B(mp, fsbno); - if (start >= end) { - if (whence == SEEK_HOLE) { - offset = end; - break; - } - ASSERT(whence == SEEK_DATA); - error = -ENXIO; - goto out_error; - } - } - -out: - /* - * If at this point we have found the hole we wanted, the returned - * offset may be bigger than the file size as it may be aligned to - * page boundary for unwritten extents. We need to deal with this - * situation in particular. - */ - if (whence == SEEK_HOLE) - offset = min_t(loff_t, offset, end); - - return offset; - -out_error: - return error; -} - -STATIC loff_t -xfs_seek_hole_data( - struct file *file, - loff_t start, - int whence) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - uint lock; - loff_t offset, end; - int error = 0; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - lock = xfs_ilock_data_map_shared(ip); - - end = i_size_read(inode); - offset = __xfs_seek_hole_data(inode, start, end, whence); - if (offset < 0) { - error = offset; - goto out_unlock; - } - - offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); - -out_unlock: - xfs_iunlock(ip, lock); - - if (error) - return error; - return offset; + return xfs_readdir(NULL, ip, ctx, bufsize); } STATIC loff_t @@ -1341,17 +980,25 @@ xfs_file_llseek( loff_t offset, int whence) { + struct inode *inode = file->f_mapping->host; + + if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) + return -EIO; + switch (whence) { - case SEEK_END: - case SEEK_CUR: - case SEEK_SET: + default: return generic_file_llseek(file, offset, whence); case SEEK_HOLE: + offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops); + break; case SEEK_DATA: - return xfs_seek_hole_data(file, offset, whence); - default: - return -EINVAL; + offset = iomap_seek_data(inode, offset, &xfs_iomap_ops); + break; } + + if (offset < 0) + return offset; + return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); } /* @@ -1373,22 +1020,21 @@ xfs_file_llseek( */ STATIC int xfs_filemap_page_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_page_mkwrite(XFS_I(inode)); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); } else { - ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); } @@ -1400,23 +1046,22 @@ xfs_filemap_page_mkwrite( STATIC int xfs_filemap_fault( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) - return xfs_filemap_page_mkwrite(vma, vmf); + return xfs_filemap_page_mkwrite(vmf); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); else - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; @@ -1425,36 +1070,34 @@ xfs_filemap_fault( /* * Similar to xfs_filemap_fault(), the DAX fault path can call into here on * both read and write faults. Hence we need to handle both cases. There is no - * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * ->huge_mkwrite callout for huge pages, so we have a single function here to * handle both cases here. @flags carries the information on the type of fault * occuring. */ STATIC int -xfs_filemap_pmd_fault( - struct vm_area_struct *vma, - unsigned long addr, - pmd_t *pmd, - unsigned int flags) +xfs_filemap_huge_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret; if (!IS_DAX(inode)) return VM_FAULT_FALLBACK; - trace_xfs_filemap_pmd_fault(ip); + trace_xfs_filemap_huge_fault(ip); - if (flags & FAULT_FLAG_WRITE) { + if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - if (flags & FAULT_FLAG_WRITE) + if (vmf->flags & FAULT_FLAG_WRITE) sb_end_pagefault(inode->i_sb); return ret; @@ -1468,11 +1111,10 @@ xfs_filemap_pmd_fault( */ static int xfs_filemap_pfn_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret = VM_FAULT_NOPAGE; loff_t size; @@ -1480,7 +1122,7 @@ xfs_filemap_pfn_mkwrite( trace_xfs_filemap_pfn_mkwrite(ip); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); /* check if the faulting page hasn't raced with truncate */ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); @@ -1488,7 +1130,7 @@ xfs_filemap_pfn_mkwrite( if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else if (IS_DAX(inode)) - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); return ret; @@ -1497,7 +1139,7 @@ xfs_filemap_pfn_mkwrite( static const struct vm_operations_struct xfs_file_vm_ops = { .fault = xfs_filemap_fault, - .pmd_fault = xfs_filemap_pmd_fault, + .huge_fault = xfs_filemap_huge_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, .pfn_mkwrite = xfs_filemap_pfn_mkwrite, diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c new file mode 100644 index 000000000000..814ed729881d --- /dev/null +++ b/fs/xfs/xfs_fsmap.c @@ -0,0 +1,943 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_error.h" +#include "xfs_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_alloc.h" +#include "xfs_bit.h" +#include <linux/fsmap.h> +#include "xfs_fsmap.h" +#include "xfs_refcount.h" +#include "xfs_refcount_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_rtalloc.h" + +/* Convert an xfs_fsmap to an fsmap. */ +void +xfs_fsmap_from_internal( + struct fsmap *dest, + struct xfs_fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = BBTOB(src->fmr_physical); + dest->fmr_owner = src->fmr_owner; + dest->fmr_offset = BBTOB(src->fmr_offset); + dest->fmr_length = BBTOB(src->fmr_length); + dest->fmr_reserved[0] = 0; + dest->fmr_reserved[1] = 0; + dest->fmr_reserved[2] = 0; +} + +/* Convert an fsmap to an xfs_fsmap. */ +void +xfs_fsmap_to_internal( + struct xfs_fsmap *dest, + struct fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = BTOBBT(src->fmr_physical); + dest->fmr_owner = src->fmr_owner; + dest->fmr_offset = BTOBBT(src->fmr_offset); + dest->fmr_length = BTOBBT(src->fmr_length); +} + +/* Convert an fsmap owner into an rmapbt owner. */ +static int +xfs_fsmap_owner_to_rmap( + struct xfs_rmap_irec *dest, + struct xfs_fsmap *src) +{ + if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { + dest->rm_owner = src->fmr_owner; + return 0; + } + + switch (src->fmr_owner) { + case 0: /* "lowest owner id possible" */ + case -1ULL: /* "highest owner id possible" */ + dest->rm_owner = 0; + break; + case XFS_FMR_OWN_FREE: + dest->rm_owner = XFS_RMAP_OWN_NULL; + break; + case XFS_FMR_OWN_UNKNOWN: + dest->rm_owner = XFS_RMAP_OWN_UNKNOWN; + break; + case XFS_FMR_OWN_FS: + dest->rm_owner = XFS_RMAP_OWN_FS; + break; + case XFS_FMR_OWN_LOG: + dest->rm_owner = XFS_RMAP_OWN_LOG; + break; + case XFS_FMR_OWN_AG: + dest->rm_owner = XFS_RMAP_OWN_AG; + break; + case XFS_FMR_OWN_INOBT: + dest->rm_owner = XFS_RMAP_OWN_INOBT; + break; + case XFS_FMR_OWN_INODES: + dest->rm_owner = XFS_RMAP_OWN_INODES; + break; + case XFS_FMR_OWN_REFC: + dest->rm_owner = XFS_RMAP_OWN_REFC; + break; + case XFS_FMR_OWN_COW: + dest->rm_owner = XFS_RMAP_OWN_COW; + break; + case XFS_FMR_OWN_DEFECTIVE: /* not implemented */ + /* fall through */ + default: + return -EINVAL; + } + return 0; +} + +/* Convert an rmapbt owner into an fsmap owner. */ +static int +xfs_fsmap_owner_from_rmap( + struct xfs_fsmap *dest, + struct xfs_rmap_irec *src) +{ + dest->fmr_flags = 0; + if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) { + dest->fmr_owner = src->rm_owner; + return 0; + } + dest->fmr_flags |= FMR_OF_SPECIAL_OWNER; + + switch (src->rm_owner) { + case XFS_RMAP_OWN_FS: + dest->fmr_owner = XFS_FMR_OWN_FS; + break; + case XFS_RMAP_OWN_LOG: + dest->fmr_owner = XFS_FMR_OWN_LOG; + break; + case XFS_RMAP_OWN_AG: + dest->fmr_owner = XFS_FMR_OWN_AG; + break; + case XFS_RMAP_OWN_INOBT: + dest->fmr_owner = XFS_FMR_OWN_INOBT; + break; + case XFS_RMAP_OWN_INODES: + dest->fmr_owner = XFS_FMR_OWN_INODES; + break; + case XFS_RMAP_OWN_REFC: + dest->fmr_owner = XFS_FMR_OWN_REFC; + break; + case XFS_RMAP_OWN_COW: + dest->fmr_owner = XFS_FMR_OWN_COW; + break; + case XFS_RMAP_OWN_NULL: /* "free" */ + dest->fmr_owner = XFS_FMR_OWN_FREE; + break; + default: + return -EFSCORRUPTED; + } + return 0; +} + +/* getfsmap query state */ +struct xfs_getfsmap_info { + struct xfs_fsmap_head *head; + xfs_fsmap_format_t formatter; /* formatting fn */ + void *format_arg; /* format buffer */ + struct xfs_buf *agf_bp; /* AGF, for refcount queries */ + xfs_daddr_t next_daddr; /* next daddr we expect */ + u64 missing_owner; /* owner of holes */ + u32 dev; /* device id */ + xfs_agnumber_t agno; /* AG number, if applicable */ + struct xfs_rmap_irec low; /* low rmap key */ + struct xfs_rmap_irec high; /* high rmap key */ + bool last; /* last extent? */ +}; + +/* Associate a device with a getfsmap handler. */ +struct xfs_getfsmap_dev { + u32 dev; + int (*fn)(struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info); +}; + +/* Compare two getfsmap device handlers. */ +static int +xfs_getfsmap_dev_compare( + const void *p1, + const void *p2) +{ + const struct xfs_getfsmap_dev *d1 = p1; + const struct xfs_getfsmap_dev *d2 = p2; + + return d1->dev - d2->dev; +} + +/* Decide if this mapping is shared. */ +STATIC int +xfs_getfsmap_is_shared( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_rmap_irec *rec, + bool *stat) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_btree_cur *cur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int error; + + *stat = false; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + /* rt files will have agno set to NULLAGNUMBER */ + if (info->agno == NULLAGNUMBER) + return 0; + + /* Are there any shared blocks here? */ + flen = 0; + cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, + info->agno, NULL); + + error = xfs_refcount_find_shared(cur, rec->rm_startblock, + rec->rm_blockcount, &fbno, &flen, false); + + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + if (error) + return error; + + *stat = flen > 0; + return 0; +} + +/* + * Format a reverse mapping for getfsmap, having translated rm_startblock + * into the appropriate daddr units. + */ +STATIC int +xfs_getfsmap_helper( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_rmap_irec *rec, + xfs_daddr_t rec_daddr) +{ + struct xfs_fsmap fmr; + struct xfs_mount *mp = tp->t_mountp; + bool shared; + int error; + + if (fatal_signal_pending(current)) + return -EINTR; + + /* + * Filter out records that start before our startpoint, if the + * caller requested that. + */ + if (xfs_rmap_compare(rec, &info->low) < 0) { + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; + } + + /* Are we just counting mappings? */ + if (info->head->fmh_count == 0) { + if (rec_daddr > info->next_daddr) + info->head->fmh_entries++; + + if (info->last) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + + info->head->fmh_entries++; + + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; + } + + /* + * If the record starts past the last physical block we saw, + * then we've found a gap. Report the gap as being owned by + * whatever the caller specified is the missing owner. + */ + if (rec_daddr > info->next_daddr) { + if (info->head->fmh_entries >= info->head->fmh_count) + return XFS_BTREE_QUERY_RANGE_ABORT; + + fmr.fmr_device = info->dev; + fmr.fmr_physical = info->next_daddr; + fmr.fmr_owner = info->missing_owner; + fmr.fmr_offset = 0; + fmr.fmr_length = rec_daddr - info->next_daddr; + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; + error = info->formatter(&fmr, info->format_arg); + if (error) + return error; + info->head->fmh_entries++; + } + + if (info->last) + goto out; + + /* Fill out the extent we found */ + if (info->head->fmh_entries >= info->head->fmh_count) + return XFS_BTREE_QUERY_RANGE_ABORT; + + trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec); + + fmr.fmr_device = info->dev; + fmr.fmr_physical = rec_daddr; + error = xfs_fsmap_owner_from_rmap(&fmr, rec); + if (error) + return error; + fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); + fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (rec->rm_flags & XFS_RMAP_UNWRITTEN) + fmr.fmr_flags |= FMR_OF_PREALLOC; + if (rec->rm_flags & XFS_RMAP_ATTR_FORK) + fmr.fmr_flags |= FMR_OF_ATTR_FORK; + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) + fmr.fmr_flags |= FMR_OF_EXTENT_MAP; + if (fmr.fmr_flags == 0) { + error = xfs_getfsmap_is_shared(tp, info, rec, &shared); + if (error) + return error; + if (shared) + fmr.fmr_flags |= FMR_OF_SHARED; + } + error = info->formatter(&fmr, info->format_arg); + if (error) + return error; + info->head->fmh_entries++; + +out: + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; +} + +/* Transform a rmapbt irec into a fsmap */ +STATIC int +xfs_getfsmap_datadev_helper( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_getfsmap_info *info = priv; + xfs_fsblock_t fsb; + xfs_daddr_t rec_daddr; + + fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock); + rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); + + return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); +} + +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + +/* Transform a bnobt irec into a fsmap */ +STATIC int +xfs_getfsmap_datadev_bnobt_helper( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno, + rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); +} + +/* Set rmap flags based on the getfsmap flags */ +static void +xfs_getfsmap_set_irec_flags( + struct xfs_rmap_irec *irec, + struct xfs_fsmap *fmr) +{ + irec->rm_flags = 0; + if (fmr->fmr_flags & FMR_OF_ATTR_FORK) + irec->rm_flags |= XFS_RMAP_ATTR_FORK; + if (fmr->fmr_flags & FMR_OF_EXTENT_MAP) + irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; + if (fmr->fmr_flags & FMR_OF_PREALLOC) + irec->rm_flags |= XFS_RMAP_UNWRITTEN; +} + +/* Execute a getfsmap query against the log device. */ +STATIC int +xfs_getfsmap_logdev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; + int error; + + /* Set up search keys */ + info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, keys); + if (error) + return error; + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); + if (error) + return error; + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; + info->high.rm_offset = ULLONG_MAX; + info->high.rm_blockcount = 0; + info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; + info->missing_owner = XFS_FMR_OWN_FREE; + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); + + if (keys[0].fmr_physical > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ + rmap.rm_startblock = 0; + rmap.rm_blockcount = mp->m_sb.sb_logblocks; + rmap.rm_owner = XFS_RMAP_OWN_LOG; + rmap.rm_offset = 0; + rmap.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &rmap, 0); +} + +/* Execute a getfsmap query against the realtime device. */ +STATIC int +__xfs_getfsmap_rtdev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + int (*query_fn)(struct xfs_trans *, + struct xfs_getfsmap_info *), + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t start_fsb; + xfs_fsblock_t end_fsb; + xfs_daddr_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + if (keys[1].fmr_physical >= eofs) + keys[1].fmr_physical = eofs - 1; + start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); + end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical); + + /* Set up search keys */ + info->low.rm_startblock = start_fsb; + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + info->high.rm_startblock = end_fsb; + error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); + if (error) + return error; + info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); + info->high.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); + + return query_fn(tp, info); +} + +/* Actually query the realtime bitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info) +{ + struct xfs_rtalloc_rec alow; + struct xfs_rtalloc_rec ahigh; + int error; + + xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + + alow.ar_startblock = info->low.rm_startblock; + ahigh.ar_startblock = info->high.rm_startblock; + error = xfs_rtalloc_query_range(tp, &alow, &ahigh, + xfs_getfsmap_rtdev_rtbitmap_helper, info); + if (error) + goto err; + + /* Report any gaps at the end of the rtbitmap */ + info->last = true; + error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info); + if (error) + goto err; +err: + xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + return error; +} + +/* Execute a getfsmap query against the realtime device rtbitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, + info); +} + +/* Execute a getfsmap query against the regular data device. */ +STATIC int +__xfs_getfsmap_datadev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info, + int (*query_fn)(struct xfs_trans *, + struct xfs_getfsmap_info *, + struct xfs_btree_cur **, + void *), + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_btree_cur *bt_cur = NULL; + xfs_fsblock_t start_fsb; + xfs_fsblock_t end_fsb; + xfs_agnumber_t start_ag; + xfs_agnumber_t end_ag; + xfs_daddr_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + if (keys[1].fmr_physical >= eofs) + keys[1].fmr_physical = eofs - 1; + start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); + end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical); + + /* + * Convert the fsmap low/high keys to AG based keys. Initialize + * low to the fsmap low key and max out the high key to the end + * of the AG. + */ + info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; + info->high.rm_offset = ULLONG_MAX; + info->high.rm_blockcount = 0; + info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; + + start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); + end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); + + /* Query each AG */ + for (info->agno = start_ag; info->agno <= end_ag; info->agno++) { + /* + * Set the AG high key from the fsmap high key if this + * is the last AG that we're querying. + */ + if (info->agno == end_ag) { + info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, + end_fsb); + info->high.rm_offset = XFS_BB_TO_FSBT(mp, + keys[1].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); + if (error) + goto err; + xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); + } + + if (bt_cur) { + xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); + bt_cur = NULL; + xfs_trans_brelse(tp, info->agf_bp); + info->agf_bp = NULL; + } + + error = xfs_alloc_read_agf(mp, tp, info->agno, 0, + &info->agf_bp); + if (error) + goto err; + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, + &info->high); + + error = query_fn(tp, info, &bt_cur, priv); + if (error) + goto err; + + /* + * Set the AG low key to the start of the AG prior to + * moving on to the next AG. + */ + if (info->agno == start_ag) { + info->low.rm_startblock = 0; + info->low.rm_owner = 0; + info->low.rm_offset = 0; + info->low.rm_flags = 0; + } + } + + /* Report any gap at the end of the AG */ + info->last = true; + error = query_fn(tp, info, &bt_cur, priv); + if (error) + goto err; + +err: + if (bt_cur) + xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : + XFS_BTREE_NOERROR); + if (info->agf_bp) { + xfs_trans_brelse(tp, info->agf_bp); + info->agf_bp = NULL; + } + + return error; +} + +/* Actually query the rmap btree. */ +STATIC int +xfs_getfsmap_datadev_rmapbt_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_btree_cur **curpp, + void *priv) +{ + /* Report any gap at the end of the last AG. */ + if (info->last) + return xfs_getfsmap_datadev_helper(*curpp, &info->high, info); + + /* Allocate cursor for this AG and query_range it. */ + *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp, + info->agno); + return xfs_rmap_query_range(*curpp, &info->low, &info->high, + xfs_getfsmap_datadev_helper, info); +} + +/* Execute a getfsmap query against the regular data device rmapbt. */ +STATIC int +xfs_getfsmap_datadev_rmapbt( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + info->missing_owner = XFS_FMR_OWN_FREE; + return __xfs_getfsmap_datadev(tp, keys, info, + xfs_getfsmap_datadev_rmapbt_query, NULL); +} + +/* Actually query the bno btree. */ +STATIC int +xfs_getfsmap_datadev_bnobt_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_btree_cur **curpp, + void *priv) +{ + struct xfs_alloc_rec_incore *key = priv; + + /* Report any gap at the end of the last AG. */ + if (info->last) + return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info); + + /* Allocate cursor for this AG and query_range it. */ + *curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp, + info->agno, XFS_BTNUM_BNO); + key->ar_startblock = info->low.rm_startblock; + key[1].ar_startblock = info->high.rm_startblock; + return xfs_alloc_query_range(*curpp, key, &key[1], + xfs_getfsmap_datadev_bnobt_helper, info); +} + +/* Execute a getfsmap query against the regular data device's bnobt. */ +STATIC int +xfs_getfsmap_datadev_bnobt( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_alloc_rec_incore akeys[2]; + + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + return __xfs_getfsmap_datadev(tp, keys, info, + xfs_getfsmap_datadev_bnobt_query, &akeys[0]); +} + +/* Do we recognize the device? */ +STATIC bool +xfs_getfsmap_is_valid_device( + struct xfs_mount *mp, + struct xfs_fsmap *fm) +{ + if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || + fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev)) + return true; + if (mp->m_logdev_targp && + fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev)) + return true; + if (mp->m_rtdev_targp && + fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev)) + return true; + return false; +} + +/* Ensure that the low key is less than the high key. */ +STATIC bool +xfs_getfsmap_check_keys( + struct xfs_fsmap *low_key, + struct xfs_fsmap *high_key) +{ + if (low_key->fmr_device > high_key->fmr_device) + return false; + if (low_key->fmr_device < high_key->fmr_device) + return true; + + if (low_key->fmr_physical > high_key->fmr_physical) + return false; + if (low_key->fmr_physical < high_key->fmr_physical) + return true; + + if (low_key->fmr_owner > high_key->fmr_owner) + return false; + if (low_key->fmr_owner < high_key->fmr_owner) + return true; + + if (low_key->fmr_offset > high_key->fmr_offset) + return false; + if (low_key->fmr_offset < high_key->fmr_offset) + return true; + + return false; +} + +#define XFS_GETFSMAP_DEVS 3 +/* + * Get filesystem's extents as described in head, and format for + * output. Calls formatter to fill the user's buffer until all + * extents are mapped, until the passed-in head->fmh_count slots have + * been filled, or until the formatter short-circuits the loop, if it + * is tracking filled-in extents on its own. + * + * Key to Confusion + * ---------------- + * There are multiple levels of keys and counters at work here: + * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; + * these reflect fs-wide sector addrs. + * dkeys -- fmh_keys used to query each device; + * these are fmh_keys but w/ the low key + * bumped up by fmr_length. + * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this + * is how we detect gaps in the fsmap + records and report them. + * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from + * dkeys; used to query the metadata. + */ +int +xfs_getfsmap( + struct xfs_mount *mp, + struct xfs_fsmap_head *head, + xfs_fsmap_format_t formatter, + void *arg) +{ + struct xfs_trans *tp = NULL; + struct xfs_fsmap dkeys[2]; /* per-dev keys */ + struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; + struct xfs_getfsmap_info info = { NULL }; + bool use_rmap; + int i; + int error = 0; + + if (head->fmh_iflags & ~FMH_IF_VALID) + return -EINVAL; + if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || + !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) + return -EINVAL; + + use_rmap = capable(CAP_SYS_ADMIN) && + xfs_sb_version_hasrmapbt(&mp->m_sb); + head->fmh_entries = 0; + + /* Set up our device handlers. */ + memset(handlers, 0, sizeof(handlers)); + handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); + if (use_rmap) + handlers[0].fn = xfs_getfsmap_datadev_rmapbt; + else + handlers[0].fn = xfs_getfsmap_datadev_bnobt; + if (mp->m_logdev_targp != mp->m_ddev_targp) { + handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); + handlers[1].fn = xfs_getfsmap_logdev; + } + if (mp->m_rtdev_targp) { + handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); + handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; + } + + xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), + xfs_getfsmap_dev_compare); + + /* + * To continue where we left off, we allow userspace to use the + * last mapping from a previous call as the low key of the next. + * This is identified by a non-zero length in the low key. We + * have to increment the low key in this scenario to ensure we + * don't return the same mapping again, and instead return the + * very next mapping. + * + * If the low key mapping refers to file data, the same physical + * blocks could be mapped to several other files/offsets. + * According to rmapbt record ordering, the minimal next + * possible record for the block range is the next starting + * offset in the same inode. Therefore, bump the file offset to + * continue the search appropriately. For all other low key + * mapping types (attr blocks, metadata), bump the physical + * offset as there can be no other mapping for the same physical + * block range. + */ + dkeys[0] = head->fmh_keys[0]; + if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { + dkeys[0].fmr_physical += dkeys[0].fmr_length; + dkeys[0].fmr_owner = 0; + if (dkeys[0].fmr_offset) + return -EINVAL; + } else + dkeys[0].fmr_offset += dkeys[0].fmr_length; + dkeys[0].fmr_length = 0; + memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); + + if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) + return -EINVAL; + + info.next_daddr = head->fmh_keys[0].fmr_physical + + head->fmh_keys[0].fmr_length; + info.formatter = formatter; + info.format_arg = arg; + info.head = head; + + /* For each device we support... */ + for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { + /* Is this device within the range the user asked for? */ + if (!handlers[i].fn) + continue; + if (head->fmh_keys[0].fmr_device > handlers[i].dev) + continue; + if (head->fmh_keys[1].fmr_device < handlers[i].dev) + break; + + /* + * If this device number matches the high key, we have + * to pass the high key to the handler to limit the + * query results. If the device number exceeds the + * low key, zero out the low key so that we get + * everything from the beginning. + */ + if (handlers[i].dev == head->fmh_keys[1].fmr_device) + dkeys[1] = head->fmh_keys[1]; + if (handlers[i].dev > head->fmh_keys[0].fmr_device) + memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + break; + + info.dev = handlers[i].dev; + info.last = false; + info.agno = NULLAGNUMBER; + error = handlers[i].fn(tp, dkeys, &info); + if (error) + break; + xfs_trans_cancel(tp); + tp = NULL; + info.next_daddr = 0; + } + + if (tp) + xfs_trans_cancel(tp); + head->fmh_oflags = FMH_OF_DEV_T; + return error; +} diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h new file mode 100644 index 000000000000..0b9bf822595c --- /dev/null +++ b/fs/xfs/xfs_fsmap.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_FSMAP_H__ +#define __XFS_FSMAP_H__ + +struct fsmap; + +/* internal fsmap representation */ +struct xfs_fsmap { + dev_t fmr_device; /* device id */ + uint32_t fmr_flags; /* mapping flags */ + uint64_t fmr_physical; /* device offset of segment */ + uint64_t fmr_owner; /* owner id */ + xfs_fileoff_t fmr_offset; /* file offset of segment */ + xfs_filblks_t fmr_length; /* length of segment, blocks */ +}; + +struct xfs_fsmap_head { + uint32_t fmh_iflags; /* control flags */ + uint32_t fmh_oflags; /* output flags */ + unsigned int fmh_count; /* # of entries in array incl. input */ + unsigned int fmh_entries; /* # of entries filled in (output). */ + + struct xfs_fsmap fmh_keys[2]; /* low and high keys */ +}; + +void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src); +void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src); + +/* fsmap to userspace formatter - copy to user & advance pointer */ +typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *); + +int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head, + xfs_fsmap_format_t formatter, void *arg); + +#endif /* __XFS_FSMAP_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 242e8091296d..8f22fc579dbb 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -352,12 +352,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1, - agno, XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); @@ -381,12 +376,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1, - agno, XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); @@ -413,8 +403,8 @@ xfs_growfs_data_private( goto error0; } - xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0, - agno, XFS_BTREE_CRC_BLOCKS); + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0, + agno, 0); block = XFS_BUF_TO_BLOCK(bp); @@ -488,12 +478,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0, - agno, XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -513,13 +498,8 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, - 0, 0, agno, - XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, - 0, agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, + 0, 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -540,9 +520,8 @@ xfs_growfs_data_private( goto error0; } - xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC, - 0, 0, agno, - XFS_BTREE_CRC_BLOCKS); + xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, + 0, 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -623,7 +602,7 @@ xfs_growfs_data_private( if (nagimax) mp->m_maxagi = nagimax; if (mp->m_sb.sb_imax_pct) { - __uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; + uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; do_div(icount, 100); mp->m_maxicount = icount << mp->m_sb.sb_inopblog; } else @@ -814,17 +793,17 @@ xfs_fs_counts( int xfs_reserve_blocks( xfs_mount_t *mp, - __uint64_t *inval, + uint64_t *inval, xfs_fsop_resblks_t *outval) { - __int64_t lcounter, delta; - __int64_t fdblks_delta = 0; - __uint64_t request; - __int64_t free; + int64_t lcounter, delta; + int64_t fdblks_delta = 0; + uint64_t request; + int64_t free; int error = 0; /* If inval is null, report current values and return */ - if (inval == (__uint64_t *)NULL) { + if (inval == (uint64_t *)NULL) { if (!outval) return -EINVAL; outval->resblks = mp->m_resblks; @@ -925,7 +904,7 @@ out: int xfs_fs_goingdown( xfs_mount_t *mp, - __uint32_t inflags) + uint32_t inflags) { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index f34915898fea..2954c13a3acd 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -22,9 +22,9 @@ extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion); extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); -extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, +extern int xfs_reserve_blocks(xfs_mount_t *mp, uint64_t *inval, xfs_fsop_resblks_t *outval); -extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); +extern int xfs_fs_goingdown(xfs_mount_t *mp, uint32_t inflags); extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp); extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 687a4b01fc53..3e1cc3001bcb 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -47,4 +47,9 @@ xfs_param_t xfs_params = { struct xfs_globals xfs_globals = { .log_recovery_delay = 0, /* no delay by default */ +#ifdef XFS_ASSERT_FATAL + .bug_on_assert = true, /* assert failures BUG() */ +#else + .bug_on_assert = false, /* assert failures WARN() */ +#endif }; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 70ca4f608321..0a9e6985a0d0 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -66,7 +66,6 @@ xfs_inode_alloc( XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); @@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (pag->pag_ici_reclaimable++) return; @@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (--pag->pag_ici_reclaimable) return; @@ -262,6 +261,22 @@ xfs_inode_clear_reclaim_tag( xfs_perag_clear_reclaim_tag(pag); } +static void +xfs_inew_wait( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); + + do { + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + if (!xfs_iflags_test(ip, XFS_INEW)) + break; + schedule(); + } while (true); + finish_wait(wq, &wait.wq_entry); +} + /* * When we recycle a reclaimable inode, we need to re-initialise the VFS inode * part of the structure. This is made more complex by the fact we store @@ -353,6 +368,11 @@ xfs_iget_cache_hit( if (ip->i_flags & XFS_IRECLAIMABLE) { trace_xfs_iget_reclaim(ip); + if (flags & XFS_IGET_INCORE) { + error = -EAGAIN; + goto out_error; + } + /* * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode * from stomping over us while we recycle the inode. We can't @@ -366,14 +386,17 @@ xfs_iget_cache_hit( error = xfs_reinit_inode(mp, inode); if (error) { + bool wake; /* * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ rcu_read_lock(); spin_lock(&ip->i_flags_lock); - + wake = !!__xfs_iflags_test(ip, XFS_INEW); ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + if (wake) + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; @@ -414,7 +437,8 @@ xfs_iget_cache_hit( if (lock_flags != 0) xfs_ilock(ip, lock_flags); - xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); + if (!(flags & XFS_IGET_INCORE)) + xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); XFS_STATS_INC(mp, xs_ig_found); return 0; @@ -585,6 +609,10 @@ again: goto out_error_or_again; } else { rcu_read_unlock(); + if (flags & XFS_IGET_INCORE) { + error = -ENOENT; + goto out_error_or_again; + } XFS_STATS_INC(mp, xs_ig_missed); error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, @@ -605,7 +633,7 @@ again: return 0; out_error_or_again: - if (error == -EAGAIN) { + if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) { delay(1); goto again; } @@ -614,6 +642,44 @@ out_error_or_again: } /* + * "Is this a cached inode that's also allocated?" + * + * Look up an inode by number in the given file system. If the inode is + * in cache and isn't in purgatory, return 1 if the inode is allocated + * and 0 if it is not. For all other cases (not in cache, being torn + * down, etc.), return a negative error code. + * + * The caller has to prevent inode allocation and freeing activity, + * presumably by locking the AGI buffer. This is to ensure that an + * inode cannot transition from allocated to freed until the caller is + * ready to allow that. If the inode is in an intermediate state (new, + * reclaimable, or being reclaimed), -EAGAIN will be returned; if the + * inode is not in the cache, -ENOENT will be returned. The caller must + * deal with these scenarios appropriately. + * + * This is a specialized use case for the online scrubber; if you're + * reading this, you probably want xfs_iget. + */ +int +xfs_icache_inode_is_allocated( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + bool *inuse) +{ + struct xfs_inode *ip; + int error; + + error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip); + if (error) + return error; + + *inuse = !!(VFS_I(ip)->i_mode); + IRELE(ip); + return 0; +} + +/* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't @@ -623,9 +689,11 @@ out_error_or_again: STATIC int xfs_inode_ag_walk_grab( - struct xfs_inode *ip) + struct xfs_inode *ip, + int flags) { struct inode *inode = VFS_I(ip); + bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); ASSERT(rcu_read_lock_held()); @@ -643,7 +711,8 @@ xfs_inode_ag_walk_grab( goto out_unlock_noent; /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) || + __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)) goto out_unlock_noent; spin_unlock(&ip->i_flags_lock); @@ -671,7 +740,8 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag) + int tag, + int iter_flags) { uint32_t first_index; int last_error = 0; @@ -713,7 +783,7 @@ restart: for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (done || xfs_inode_ag_walk_grab(ip)) + if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) batch[i] = NULL; /* @@ -741,6 +811,9 @@ restart: for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; + if ((iter_flags & XFS_AGITER_INEW_WAIT) && + xfs_iflags_test(batch[i], XFS_INEW)) + xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == -EAGAIN) { @@ -820,12 +893,13 @@ xfs_cowblocks_worker( } int -xfs_inode_ag_iterator( +xfs_inode_ag_iterator_flags( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, - void *args) + void *args, + int iter_flags) { struct xfs_perag *pag; int error = 0; @@ -835,7 +909,8 @@ xfs_inode_ag_iterator( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, + iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -847,6 +922,17 @@ xfs_inode_ag_iterator( } int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int flags, + void *args) +{ + return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); +} + +int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, @@ -863,7 +949,8 @@ xfs_inode_ag_iterator_tag( ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, tag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + 0); xfs_perag_put(pag); if (error) { last_error = error; @@ -1322,13 +1409,10 @@ xfs_inode_free_eofblocks( int flags, void *args) { - int ret; + int ret = 0; struct xfs_eofblocks *eofb = args; - bool need_iolock = true; int match; - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); @@ -1356,21 +1440,19 @@ xfs_inode_free_eofblocks( if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; - - /* - * A scan owner implies we already hold the iolock. Skip it in - * xfs_free_eofblocks() to avoid deadlock. This also eliminates - * the possibility of EAGAIN being returned. - */ - if (eofb->eof_scan_owner == ip->i_ino) - need_iolock = false; } - ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); - - /* don't revisit the inode if we're not waiting */ - if (ret == -EAGAIN && !(flags & SYNC_WAIT)) - ret = 0; + /* + * If the caller is waiting, return -EAGAIN to keep the background + * scanner moving and revisit the inode in a subsequent pass. + */ + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (flags & SYNC_WAIT) + ret = -EAGAIN; + return ret; + } + ret = xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } @@ -1417,15 +1499,10 @@ __xfs_inode_free_quota_eofblocks( struct xfs_eofblocks eofb = {0}; struct xfs_dquot *dq; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - /* - * Set the scan owner to avoid a potential livelock. Otherwise, the scan - * can repeatedly trylock on the inode we're currently processing. We - * run a sync scan to increase effectiveness and use the union filter to + * Run a sync scan to increase effectiveness and use the union filter to * cover all applicable quotas in a single scan. */ - eofb.eof_scan_owner = ip->i_ino; eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { @@ -1577,12 +1654,9 @@ xfs_inode_free_cowblocks( { int ret; struct xfs_eofblocks *eofb = args; - bool need_iolock = true; int match; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1615,28 +1689,16 @@ xfs_inode_free_cowblocks( if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; - - /* - * A scan owner implies we already hold the iolock. Skip it in - * xfs_free_eofblocks() to avoid deadlock. This also eliminates - * the possibility of EAGAIN being returned. - */ - if (eofb->eof_scan_owner == ip->i_ino) - need_iolock = false; } /* Free the CoW blocks */ - if (need_iolock) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - } + xfs_ilock(ip, XFS_IOLOCK_EXCL); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); - if (need_iolock) { - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index a1e02f4708ab..bff4d85e5498 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -27,7 +27,6 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; - xfs_ino_t eof_scan_owner; }; #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ @@ -48,6 +47,12 @@ struct xfs_eofblocks { #define XFS_IGET_CREATE 0x1 #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 +#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */ + +/* + * flags for AG inode iterator + */ +#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); @@ -80,6 +85,9 @@ void xfs_cowblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); +int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, void *args), + int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); @@ -102,7 +110,6 @@ xfs_fs_eofblocks_from_user( dst->eof_flags = src->eof_flags; dst->eof_prid = src->eof_prid; dst->eof_min_file_size = src->eof_min_file_size; - dst->eof_scan_owner = NULLFSINO; dst->eof_uid = INVALID_UID; if (src->eof_flags & XFS_EOF_FLAGS_UID) { @@ -120,4 +127,7 @@ xfs_fs_eofblocks_from_user( return 0; } +int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_ino_t ino, bool *inuse); + #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index de32f0fe47c8..ceef77c0416a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -50,6 +50,7 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_inode_zone; @@ -621,17 +622,17 @@ __xfs_iflock( DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); do { - prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_isiflocked(ip)) io_schedule(); } while (!xfs_iflock_nowait(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } STATIC uint _xfs_dic2xflags( - __uint16_t di_flags, + uint16_t di_flags, uint64_t di_flags2, bool has_attr) { @@ -854,8 +855,8 @@ xfs_ialloc( inode->i_version = 1; ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; - ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec; + ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; + ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec; } @@ -1615,7 +1616,7 @@ xfs_itruncate_extents( /* Remove all pending CoW reservations. */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, - last_block); + last_block, true); if (error) goto out; @@ -1692,32 +1693,34 @@ xfs_release( if (xfs_can_free_eofblocks(ip, false)) { /* + * Check if the inode is being opened, written and closed + * frequently and we have delayed allocation blocks outstanding + * (e.g. streaming writes from the NFS server), truncating the + * blocks past EOF will cause fragmentation to occur. + * + * In this case don't do the truncation, but we have to be + * careful how we detect this case. Blocks beyond EOF show up as + * i_delayed_blks even when the inode is clean, so we need to + * truncate them away first before checking for a dirty release. + * Hence on the first dirty close we will still remove the + * speculative allocation, but after that we will leave it in + * place. + */ + if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) + return 0; + /* * If we can't get the iolock just skip truncating the blocks * past EOF because we could deadlock with the mmap_sem - * otherwise. We'll get another chance to drop them once the + * otherwise. We'll get another chance to drop them once the * last reference to the inode is dropped, so we'll never leak * blocks permanently. - * - * Further, check if the inode is being opened, written and - * closed frequently and we have delayed allocation blocks - * outstanding (e.g. streaming writes from the NFS server), - * truncating the blocks past EOF will cause fragmentation to - * occur. - * - * In this case don't do the truncation, either, but we have to - * be careful how we detect this case. Blocks beyond EOF show - * up as i_delayed_blks even when the inode is clean, so we - * need to truncate them away first before checking for a dirty - * release. Hence on the first dirty close we will still remove - * the speculative allocation, but after that we will leave it - * in place. */ - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) - return 0; - - error = xfs_free_eofblocks(mp, ip, true); - if (error && error != -EAGAIN) - return error; + if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + error = xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + if (error) + return error; + } /* delalloc blocks after truncation means it really is dirty */ if (ip->i_delayed_blks) @@ -1903,9 +1906,13 @@ xfs_inactive( * force is true because we are evicting an inode from the * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. + * + * Note: don't bother with iolock here since lockdep complains + * about acquiring it in reclaim context. We have the only + * reference to the inode at this point anyways. */ if (xfs_can_free_eofblocks(ip, true)) - xfs_free_eofblocks(mp, ip, false); + xfs_free_eofblocks(ip); return; } @@ -2479,11 +2486,11 @@ __xfs_iunpin_wait( xfs_iunpin(ip); do { - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_ipincount(ip)) io_schedule(); } while (xfs_ipincount(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } void @@ -3482,7 +3489,7 @@ xfs_iflush_int( dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), - mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { + mp, XFS_ERRTAG_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); @@ -3492,7 +3499,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR( (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { + mp, XFS_ERRTAG_IFLUSH_3)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad regular inode %Lu, ptr 0x%p", __func__, ip->i_ino, ip); @@ -3503,7 +3510,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), - mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { + mp, XFS_ERRTAG_IFLUSH_4)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad directory inode %Lu, ptr 0x%p", __func__, ip->i_ino, ip); @@ -3511,8 +3518,7 @@ xfs_iflush_int( } } if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > - ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, - XFS_RANDOM_IFLUSH_5)) { + ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " "total extents = %d, nblocks = %Ld, ptr 0x%p", @@ -3522,7 +3528,7 @@ xfs_iflush_int( goto corrupt_out; } if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, - mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { + mp, XFS_ERRTAG_IFLUSH_6)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", __func__, ip->i_ino, ip->i_d.di_forkoff, ip); @@ -3541,6 +3547,12 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; + /* Check the inline directory data. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && + xfs_dir2_sf_verify(ip)) + goto corrupt_out; + /* * Copy the dirty parts of the inode into the on-disk inode. We always * copy out the core of the inode, because if the inode is dirty at all diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 10dcf27b4c85..0ee453de239a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -192,8 +192,8 @@ static inline void xfs_set_projid(struct xfs_inode *ip, prid_t projid) { - ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16); - ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); + ip->i_d.di_projid_hi = (uint16_t) (projid >> 16); + ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff); } static inline prid_t @@ -216,7 +216,8 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ -#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define __XFS_INEW_BIT 3 /* inode has just been allocated */ +#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -444,9 +445,6 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, bool *did_zero); -loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, - loff_t eof, int whence); - /* from xfs_iops.c */ extern void xfs_setup_inode(struct xfs_inode *ip); @@ -464,6 +462,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip) xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(VFS_I(ip)); + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); } static inline void xfs_setup_existing_inode(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d90e7811ccdd..013cc78d7daf 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -731,22 +731,27 @@ xfs_iflush_done( * holding the lock before removing the inode from the AIL. */ if (need_ail) { - struct xfs_log_item *log_items[need_ail]; - int i = 0; + bool mlip_changed = false; + + /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->xa_lock); for (blip = lip; blip; blip = blip->li_bio_list) { - iip = INODE_ITEM(blip); - if (iip->ili_logged && - blip->li_lsn == iip->ili_flush_lsn) { - log_items[i++] = blip; - } - ASSERT(i <= need_ail); + if (INODE_ITEM(blip)->ili_logged && + blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) + mlip_changed |= xfs_ail_delete_one(ailp, blip); } - /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ - xfs_trans_ail_delete_bulk(ailp, log_items, i, - SHUTDOWN_CORRUPT_INCORE); - } + if (mlip_changed) { + if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) + xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (list_empty(&ailp->xa_ail)) + wake_up_all(&ailp->xa_empty); + } + spin_unlock(&ailp->xa_lock); + + if (mlip_changed) + xfs_log_space_wake(ailp->xa_mount); + } /* * clean up and unlock the flush lock now we are done. We can clear the @@ -829,9 +834,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f32->ilf_dsize; in_f->ilf_ino = in_f32->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f32->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f32->ilf_blkno; in_f->ilf_len = in_f32->ilf_len; in_f->ilf_boffset = in_f32->ilf_boffset; @@ -846,9 +849,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f64->ilf_dsize; in_f->ilf_ino = in_f64->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f64->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f64->ilf_blkno; in_f->ilf_len = in_f64->ilf_len; in_f->ilf_boffset = in_f64->ilf_boffset; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c67cfb451fd3..9c0c7a920304 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -41,8 +41,12 @@ #include "xfs_trans.h" #include "xfs_pnfs.h" #include "xfs_acl.h" +#include "xfs_btree.h" +#include <linux/fsmap.h> +#include "xfs_fsmap.h" #include <linux/capability.h> +#include <linux/cred.h> #include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> @@ -116,8 +120,7 @@ xfs_find_handle( handle.ha_fid.fid_pad = 0; handle.ha_fid.fid_gen = inode->i_generation; handle.ha_fid.fid_ino = ip->i_ino; - - hsize = XFS_HSIZE(handle); + hsize = sizeof(xfs_handle_t); } error = -EFAULT; @@ -440,8 +443,8 @@ xfs_attrmulti_attr_get( struct inode *inode, unsigned char *name, unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags) + uint32_t *len, + uint32_t flags) { unsigned char *kbuf; int error = -EFAULT; @@ -469,8 +472,8 @@ xfs_attrmulti_attr_set( struct inode *inode, unsigned char *name, const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags) + uint32_t len, + uint32_t flags) { unsigned char *kbuf; int error; @@ -495,7 +498,7 @@ int xfs_attrmulti_attr_remove( struct inode *inode, unsigned char *name, - __uint32_t flags) + uint32_t flags) { int error; @@ -873,7 +876,7 @@ xfs_merge_ioc_xflags( STATIC unsigned int xfs_di2lxflags( - __uint16_t di_flags) + uint16_t di_flags) { unsigned int flags = 0; @@ -1284,7 +1287,7 @@ xfs_ioctl_setattr_check_projid( struct fsxattr *fa) { /* Disallow 32bit project ids if projid32bit feature is not enabled. */ - if (fa->fsx_projid > (__uint16_t)-1 && + if (fa->fsx_projid > (uint16_t)-1 && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return -EINVAL; @@ -1524,7 +1527,7 @@ out_drop_write: } STATIC int -xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmap_format(void **ap, struct getbmapx *bmv) { struct getbmap __user *base = (struct getbmap __user *)*ap; @@ -1542,10 +1545,11 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmapx bmx; + struct getbmapx bmx = { 0 }; int error; - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + /* struct getbmap is a strict subset of struct getbmapx. */ + if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) return -EFAULT; if (bmx.bmv_count < 2) @@ -1567,7 +1571,7 @@ xfs_ioc_getbmap( } STATIC int -xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmapx_format(void **ap, struct getbmapx *bmv) { struct getbmapx __user *base = (struct getbmapx __user *)*ap; @@ -1607,6 +1611,84 @@ xfs_ioc_getbmapx( return 0; } +struct getfsmap_info { + struct xfs_mount *mp; + struct fsmap_head __user *data; + unsigned int idx; + __u32 last_flags; +}; + +STATIC int +xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv) +{ + struct getfsmap_info *info = priv; + struct fsmap fm; + + trace_xfs_getfsmap_mapping(info->mp, xfm); + + info->last_flags = xfm->fmr_flags; + xfs_fsmap_from_internal(&fm, xfm); + if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm, + sizeof(struct fsmap))) + return -EFAULT; + + return 0; +} + +STATIC int +xfs_ioc_getfsmap( + struct xfs_inode *ip, + struct fsmap_head __user *arg) +{ + struct getfsmap_info info = { NULL }; + struct xfs_fsmap_head xhead = {0}; + struct fsmap_head head; + bool aborted = false; + int error; + + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) + return -EFAULT; + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, + sizeof(head.fmh_keys[0].fmr_reserved)) || + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, + sizeof(head.fmh_keys[1].fmr_reserved))) + return -EINVAL; + + xhead.fmh_iflags = head.fmh_iflags; + xhead.fmh_count = head.fmh_count; + xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); + xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); + + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); + + info.mp = ip->i_mount; + info.data = arg; + error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + error = 0; + aborted = true; + } else if (error) + return error; + + /* If we didn't abort, set the "last" flag in the last fmx */ + if (!aborted && info.idx) { + info.last_flags |= FMR_OF_LAST; + if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags, + &info.last_flags, sizeof(info.last_flags))) + return -EFAULT; + } + + /* copy back header */ + head.fmh_entries = xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) + return -EFAULT; + + return 0; +} + int xfs_ioc_swapext( xfs_swapext_t *sxp) @@ -1787,6 +1869,9 @@ xfs_file_ioctl( case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(ip, arg); + case FS_IOC_GETFSMAP: + return xfs_ioc_getfsmap(ip, arg); + case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: { @@ -1846,7 +1931,7 @@ xfs_file_ioctl( case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; - __uint64_t in; + uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1932,12 +2017,12 @@ xfs_file_ioctl( } case XFS_IOC_GOINGDOWN: { - __uint32_t in; + uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (get_user(in, (__uint32_t __user *)arg)) + if (get_user(in, (uint32_t __user *)arg)) return -EFAULT; return xfs_fs_goingdown(mp, in); @@ -1952,14 +2037,14 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -EFAULT; - return xfs_errortag_add(in.errtag, mp); + return xfs_errortag_add(mp, in.errtag); } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return xfs_errortag_clearall(mp, 1); + return xfs_errortag_clearall(mp); case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_fs_eofblocks eofb; diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 8b52881bfd90..e86c3ea137d2 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -48,22 +48,22 @@ xfs_attrmulti_attr_get( struct inode *inode, unsigned char *name, unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags); + uint32_t *len, + uint32_t flags); extern int xfs_attrmulti_attr_set( struct inode *inode, unsigned char *name, const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags); + uint32_t len, + uint32_t flags); extern int xfs_attrmulti_attr_remove( struct inode *inode, unsigned char *name, - __uint32_t flags); + uint32_t flags); extern struct dentry * xfs_handle_to_dentry( diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 7c49938c5aed..fa0bc4d46065 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -20,6 +20,7 @@ #include <linux/mount.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/fsmap.h> #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" @@ -554,6 +555,7 @@ xfs_file_compat_ioctl( case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: + case FS_IOC_GETFSMAP: return xfs_file_ioctl(filp, cmd, p); #ifndef BROKEN_X86_ALIGNMENT /* These are handled fine if no alignment issues */ diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index b1bb45444df8..5492bcf6f442 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -112,9 +112,9 @@ typedef struct compat_xfs_fsop_handlereq { /* The bstat field in the swapext struct needs translation */ typedef struct compat_xfs_swapext { - __int64_t sx_version; /* version */ - __int64_t sx_fdtarget; /* fd of target file */ - __int64_t sx_fdtmp; /* fd of tmp file */ + int64_t sx_version; /* version */ + int64_t sx_fdtarget; /* fd of target file */ + int64_t sx_fdtmp; /* fd of tmp file */ xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1aa3abd67b36..813394c62849 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -162,7 +162,7 @@ xfs_iomap_write_direct( xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; xfs_fsblock_t firstfsb; - xfs_extlen_t extsz, temp; + xfs_extlen_t extsz; int nimaps; int quota_flag; int rt; @@ -203,14 +203,7 @@ xfs_iomap_write_direct( } count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); - - resaligned = count_fsb; - if (unlikely(extsz)) { - if ((temp = do_mod(offset_fsb, extsz))) - resaligned += temp; - if ((temp = do_mod(resaligned, extsz))) - resaligned += extsz - temp; - } + resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz); if (unlikely(rt)) { resrtextents = qblocks = resaligned; @@ -247,7 +240,7 @@ xfs_iomap_write_direct( */ if (IS_DAX(VFS_I(ip))) { bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; - if (ISUNWRITTEN(imap)) { + if (imap->br_state == XFS_EXT_UNWRITTEN) { tflags |= XFS_TRANS_RESERVE; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; } @@ -550,7 +543,7 @@ xfs_file_iomap_begin_delay( if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + mp, XFS_ERRTAG_BMAPIFORMAT))) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto out_unlock; @@ -637,6 +630,11 @@ retry: goto out_unlock; } + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -685,7 +683,7 @@ xfs_iomap_write_allocate( int nres; if (whichfork == XFS_COW_FORK) - flags |= XFS_BMAPI_COWFORK; + flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; /* * Make sure that the dquots are there. @@ -947,7 +945,7 @@ static inline bool imap_needs_alloc(struct inode *inode, return !nimaps || imap->br_startblock == HOLESTARTBLOCK || imap->br_startblock == DELAYSTARTBLOCK || - (IS_DAX(inode) && ISUNWRITTEN(imap)); + (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); } static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) @@ -978,6 +976,7 @@ xfs_file_iomap_begin( int nimaps = 1, error = 0; bool shared = false, trimmed = false; unsigned lockmode; + struct block_device *bdev; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -996,53 +995,51 @@ xfs_file_iomap_begin( lockmode = xfs_ilock_data_map_shared(ip); } + if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) { + error = -EAGAIN; + goto out_unlock; + } + ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) length = mp->m_super->s_maxbytes - offset; offset_fsb = XFS_B_TO_FSBT(mp, offset); end_fsb = XFS_B_TO_FSB(mp, offset + length); - if (xfs_is_reflink_inode(ip) && - (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) { - shared = xfs_reflink_find_cow_mapping(ip, offset, &imap); - if (shared) { - xfs_iunlock(ip, lockmode); - goto alloc_done; - } - ASSERT(!isnullstartblock(imap.br_startblock)); - } - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); if (error) goto out_unlock; - if ((flags & IOMAP_REPORT) || - (xfs_is_reflink_inode(ip) && - (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) { + if (flags & IOMAP_REPORT) { /* Trim the mapping to the nearest shared extent boundary. */ error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); if (error) goto out_unlock; - - /* - * We're here because we're trying to do a directio write to a - * region that isn't aligned to a filesystem block. If the - * extent is shared, fall back to buffered mode to handle the - * RMW. - */ - if (!(flags & IOMAP_REPORT) && shared) { - trace_xfs_reflink_bounce_dio_write(ip, &imap); - error = -EREMCHG; - goto out_unlock; - } } if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { - error = xfs_reflink_reserve_cow(ip, &imap, &shared); - if (error) - goto out_unlock; + if (flags & IOMAP_DIRECT) { + /* + * A reflinked inode will result in CoW alloc. + * FIXME: It could still overwrite on unshared extents + * and not need allocation. + */ + if (flags & IOMAP_NOWAIT) { + error = -EAGAIN; + goto out_unlock; + } + /* may drop and re-acquire the ilock */ + error = xfs_reflink_allocate_cow(ip, &imap, &shared, + &lockmode); + if (error) + goto out_unlock; + } else { + error = xfs_reflink_reserve_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + } end_fsb = imap.br_startoff + imap.br_blockcount; length = XFS_FSB_TO_B(mp, end_fsb) - offset; @@ -1050,6 +1047,14 @@ xfs_file_iomap_begin( if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { /* + * If nowait is set bail since we are going to make + * allocations. + */ + if (flags & IOMAP_NOWAIT) { + error = -EAGAIN; + goto out_unlock; + } + /* * We cap the maximum length we map here to MAX_WRITEBACK_PAGES * pages to keep the chunks of work done where somewhat symmetric * with the work writeback does. This is a completely arbitrary @@ -1071,7 +1076,6 @@ xfs_file_iomap_begin( if (error) return error; -alloc_done: iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); } else { @@ -1082,6 +1086,14 @@ alloc_done: } xfs_bmbt_to_iomap(ip, iomap, &imap); + + /* optionally associate a dax device with the iomap bdev */ + bdev = iomap->bdev; + if (blk_queue_dax(bdev->bd_queue)) + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); + else + iomap->dax_dev = NULL; + if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; @@ -1095,25 +1107,46 @@ xfs_file_iomap_end_delalloc( struct xfs_inode *ip, loff_t offset, loff_t length, - ssize_t written) + ssize_t written, + struct iomap *iomap) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; - start_fsb = XFS_B_TO_FSB(mp, offset + written); + /* + * Behave as if the write failed if drop writes is enabled. Set the NEW + * flag to force delalloc cleanup. + */ + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { + iomap->flags |= IOMAP_F_NEW; + written = 0; + } + + /* + * start_fsb refers to the first unused block after a short write. If + * nothing was written, round offset down to point at the first block in + * the range. + */ + if (unlikely(!written)) + start_fsb = XFS_B_TO_FSBT(mp, offset); + else + start_fsb = XFS_B_TO_FSB(mp, offset + written); end_fsb = XFS_B_TO_FSB(mp, offset + length); /* - * Trim back delalloc blocks if we didn't manage to write the whole - * range reserved. + * Trim delalloc blocks if they were allocated by this write and we + * didn't manage to write the whole range. * * We don't need to care about racing delalloc as we hold i_mutex * across the reserve/allocate/unreserve calls. If there are delalloc * blocks in the range, they are ours. */ - if (start_fsb < end_fsb) { + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { + truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), + XFS_FSB_TO_B(mp, end_fsb) - 1); + xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmap_punch_delalloc_range(ip, start_fsb, end_fsb - start_fsb); @@ -1138,13 +1171,14 @@ xfs_file_iomap_end( unsigned flags, struct iomap *iomap) { + fs_put_dax(iomap->dax_dev); if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written); + length, written, iomap); return 0; } -struct iomap_ops xfs_iomap_ops = { +const struct iomap_ops xfs_iomap_ops = { .iomap_begin = xfs_file_iomap_begin, .iomap_end = xfs_file_iomap_end, }; @@ -1168,10 +1202,10 @@ xfs_xattr_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - lockmode = xfs_ilock_data_map_shared(ip); + lockmode = xfs_ilock_attr_map_shared(ip); /* if there are no attribute fork or extents, return ENOENT */ - if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { + if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { error = -ENOENT; goto out_unlock; } @@ -1190,6 +1224,6 @@ out_unlock: return error; } -struct iomap_ops xfs_xattr_iomap_ops = { +const struct iomap_ops xfs_xattr_iomap_ops = { .iomap_begin = xfs_xattr_iomap_begin, }; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 6d45cf01fcff..00db3ecea084 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -33,7 +33,27 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); -extern struct iomap_ops xfs_iomap_ops; -extern struct iomap_ops xfs_xattr_iomap_ops; +static inline xfs_filblks_t +xfs_aligned_fsb_count( + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + xfs_extlen_t extsz) +{ + if (extsz) { + xfs_extlen_t align; + + align = do_mod(offset_fsb, extsz); + if (align) + count_fsb += align; + align = do_mod(count_fsb, extsz); + if (align) + count_fsb += extsz - align; + } + + return count_fsb; +} + +extern const struct iomap_ops xfs_iomap_ops; +extern const struct iomap_ops xfs_xattr_iomap_ops; #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 22c16155f1b4..469c9fa4c178 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -190,12 +190,12 @@ xfs_generic_create( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } @@ -460,7 +460,7 @@ xfs_vn_get_link( if (!dentry) return ERR_PTR(-ECHILD); - link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); + link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL); if (!link) goto out_err; @@ -489,11 +489,12 @@ xfs_vn_get_link_inline( STATIC int xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) + const struct path *path, + struct kstat *stat, + u32 request_mask, + unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -515,6 +516,20 @@ xfs_vn_getattr( stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + if (ip->i_d.di_version == 3) { + if (request_mask & STATX_BTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; + stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; + } + } + + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; switch (inode->i_mode & S_IFMT) { case S_IFBLK: diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..c393a2f6d8c3 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -31,7 +31,7 @@ #include "xfs_trace.h" #include "xfs_icache.h" -STATIC int +int xfs_internal_inum( xfs_mount_t *mp, xfs_ino_t ino) @@ -361,7 +361,6 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - size_t irbsize; /* size of irec buffer in bytes */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ int nirbuf; /* size of irbuf */ int ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat( *ubcountp = 0; *done = 0; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); if (!irbuf) return -ENOMEM; - - nirbuf = irbsize / sizeof(*irbuf); + nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); /* * Loop over the allocation groups, starting from the last @@ -585,7 +583,7 @@ xfs_inumbers( return error; bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); - buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); + buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); do { struct xfs_inobt_rec_incore r; int stat; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 6ea8b3912fa4..17e86e0541af 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -96,4 +96,6 @@ xfs_inumbers( void __user *buffer, /* buffer with inode info */ inumbers_fmt_pf formatter); +int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); + #endif /* __XFS_ITABLE_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 7a989de224f4..9301c5a6060b 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -19,18 +19,11 @@ #define __XFS_LINUX__ #include <linux/types.h> +#include <linux/uuid.h> /* * Kernel specific type declarations for XFS */ -typedef signed char __int8_t; -typedef unsigned char __uint8_t; -typedef signed short int __int16_t; -typedef unsigned short int __uint16_t; -typedef signed int __int32_t; -typedef unsigned int __uint32_t; -typedef signed long long int __int64_t; -typedef unsigned long long int __uint64_t; typedef __s64 xfs_off_t; /* <file offset> type */ typedef unsigned long long xfs_ino_t; /* <inode> type */ @@ -42,7 +35,6 @@ typedef __u32 xfs_nlink_t; #include "kmem.h" #include "mrlock.h" -#include "uuid.h" #include <linux/semaphore.h> #include <linux/mm.h> @@ -55,7 +47,7 @@ typedef __u32 xfs_nlink_t; #include <linux/file.h> #include <linux/swap.h> #include <linux/errno.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/bitops.h> #include <linux/major.h> #include <linux/pagemap.h> @@ -151,7 +143,6 @@ typedef __u32 xfs_nlink_t; #define __return_address __builtin_return_address(0) #define XFS_PROJID_DEFAULT 0 -#define MAXPATHLEN 1024 #define MIN(a,b) (min(a,b)) #define MAX(a,b) (max(a,b)) @@ -186,22 +177,22 @@ extern struct xstats xfsstats; * are converting to the init_user_ns. The uid is later mapped to a particular * user namespace value when crossing the kernel/user boundary. */ -static inline __uint32_t xfs_kuid_to_uid(kuid_t uid) +static inline uint32_t xfs_kuid_to_uid(kuid_t uid) { return from_kuid(&init_user_ns, uid); } -static inline kuid_t xfs_uid_to_kuid(__uint32_t uid) +static inline kuid_t xfs_uid_to_kuid(uint32_t uid) { return make_kuid(&init_user_ns, uid); } -static inline __uint32_t xfs_kgid_to_gid(kgid_t gid) +static inline uint32_t xfs_kgid_to_gid(kgid_t gid) { return from_kgid(&init_user_ns, gid); } -static inline kgid_t xfs_gid_to_kgid(__uint32_t gid) +static inline kgid_t xfs_gid_to_kgid(uint32_t gid) { return make_kgid(&init_user_ns, gid); } @@ -212,88 +203,6 @@ static inline kgid_t xfs_gid_to_kgid(__uint32_t gid) #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() - -/* Move the kernel do_div definition off to one side */ - -#if defined __i386__ -/* For ia32 we need to pull some tricks to get past various versions - * of the compiler which do not like us using do_div in the middle - * of large functions. - */ -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - *(__u64 *)a = c; - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} -#else -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - mod = do_div(*(__u64 *)a, b); - return mod; - } - - /* NOTREACHED */ - return 0; -} - /* Side effect free 64 bit mod operation */ static inline __u32 xfs_do_mod(void *a, __u32 b, int n) { @@ -310,20 +219,17 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n) /* NOTREACHED */ return 0; } -#endif -#undef do_div -#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) #define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) -static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) +static inline uint64_t roundup_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); return x * y; } -static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) +static inline uint64_t howmany_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b1469f0a91a6..0053bcf2b10a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -434,7 +434,7 @@ xfs_log_reserve( int unit_bytes, int cnt, struct xlog_ticket **ticp, - __uint8_t client, + uint8_t client, bool permanent) { struct xlog *log = mp->m_log; @@ -825,9 +825,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (!error) { /* the data section must be 32 bit size aligned */ struct { - __uint16_t magic; - __uint16_t pad1; - __uint32_t pad2; /* may as well make it 64 bits */ + uint16_t magic; + uint16_t pad1; + uint32_t pad2; /* may as well make it 64 bits */ } magic = { .magic = XLOG_UNMOUNT_TYPE, }; @@ -1189,8 +1189,7 @@ xlog_iodone(xfs_buf_t *bp) * IOABORT state. The IOABORT state is only set in DEBUG mode to inject * CRC errors into log recovery. */ - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR, - XFS_RANDOM_IODONE_IOERR) || + if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || iclog->ic_state & XLOG_STATE_IOABORT) { if (iclog->ic_state & XLOG_STATE_IOABORT) iclog->ic_state &= ~XLOG_STATE_IOABORT; @@ -1293,7 +1292,7 @@ void xfs_log_work_queue( struct xfs_mount *mp) { - queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, + queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work, msecs_to_jiffies(xfs_syncd_centisecs * 10)); } @@ -1665,7 +1664,7 @@ xlog_cksum( char *dp, int size) { - __uint32_t crc; + uint32_t crc; /* first generate the crc for the record header ... */ crc = xfs_start_cksum_update((char *)rhead, @@ -1828,7 +1827,7 @@ xlog_sync( */ dptr = (char *)&iclog->ic_header + count; for (i = 0; i < split; i += BBSIZE) { - __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); + uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); if (++cycle == XLOG_HEADER_MAGIC_NUM) cycle++; *(__be32 *)dptr = cpu_to_be32(cycle); @@ -1842,7 +1841,6 @@ xlog_sync( /* calculcate the checksum */ iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, iclog->ic_datap, size); -#ifdef DEBUG /* * Intentionally corrupt the log record CRC based on the error injection * frequency, if defined. This facilitates testing log recovery in the @@ -1850,15 +1848,13 @@ xlog_sync( * write on I/O completion and shutdown the fs. The subsequent mount * detects the bad CRC and attempts to recover. */ - if (log->l_badcrc_factor && - (prandom_u32() % log->l_badcrc_factor == 0)) { - iclog->ic_header.h_crc &= 0xAAAAAAAA; + if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { + iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); iclog->ic_state |= XLOG_STATE_IOABORT; xfs_warn(log->l_mp, "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", be64_to_cpu(iclog->ic_header.h_lsn)); } -#endif bp->b_io_length = BTOBB(count); bp->b_fspriv = iclog; @@ -2024,7 +2020,7 @@ xlog_print_tic_res( }; #undef REG_TYPE_STR - xfs_warn(mp, "xlog_write: reservation summary:"); + xfs_warn(mp, "ticket reservation summary:"); xfs_warn(mp, " unit res = %d bytes", ticket->t_unit_res); xfs_warn(mp, " current res = %d bytes", @@ -2045,10 +2041,55 @@ xlog_print_tic_res( "bad-rtype" : res_type_str[r_type]), ticket->t_res_arr[i].r_len); } +} + +/* + * Print a summary of the transaction. + */ +void +xlog_print_trans( + struct xfs_trans *tp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_log_item_desc *lidp; + + /* dump core transaction and ticket info */ + xfs_warn(mp, "transaction summary:"); + xfs_warn(mp, " flags = 0x%x", tp->t_flags); + + xlog_print_tic_res(mp, tp->t_ticket); - xfs_alert_tag(mp, XFS_PTAG_LOGRES, - "xlog_write: reservation ran out. Need to up reservation"); - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); + /* dump each log item */ + list_for_each_entry(lidp, &tp->t_items, lid_trans) { + struct xfs_log_item *lip = lidp->lid_item; + struct xfs_log_vec *lv = lip->li_lv; + struct xfs_log_iovec *vec; + int i; + + xfs_warn(mp, "log item: "); + xfs_warn(mp, " type = 0x%x", lip->li_type); + xfs_warn(mp, " flags = 0x%x", lip->li_flags); + if (!lv) + continue; + xfs_warn(mp, " niovecs = %d", lv->lv_niovecs); + xfs_warn(mp, " size = %d", lv->lv_size); + xfs_warn(mp, " bytes = %d", lv->lv_bytes); + xfs_warn(mp, " buf len = %d", lv->lv_buf_len); + + /* dump each iovec for the log item */ + vec = lv->lv_iovecp; + for (i = 0; i < lv->lv_niovecs; i++) { + int dumplen = min(vec->i_len, 32); + + xfs_warn(mp, " iovec[%d]", i); + xfs_warn(mp, " type = 0x%x", vec->i_type); + xfs_warn(mp, " len = %d", vec->i_len); + xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i); + xfs_hex_dump(vec->i_addr, dumplen); + + vec++; + } + } } /* @@ -2321,8 +2362,12 @@ xlog_write( if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) ticket->t_curr_res -= sizeof(xlog_op_header_t); - if (ticket->t_curr_res < 0) + if (ticket->t_curr_res < 0) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "ctx ticket reservation ran out. Need to up reservation"); xlog_print_tic_res(log->l_mp, ticket); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + } index = 0; lv = log_vector; @@ -2363,8 +2408,8 @@ xlog_write( } reg = &vecp[index]; - ASSERT(reg->i_len % sizeof(__int32_t) == 0); - ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); + ASSERT(reg->i_len % sizeof(int32_t) == 0); + ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); start_rec_copy = xlog_write_start_rec(ptr, ticket); if (start_rec_copy) { @@ -3143,7 +3188,7 @@ xlog_state_switch_iclogs( /* Round up to next log-sunit */ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && log->l_mp->m_sb.sb_logsunit > 1) { - __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit); + uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit); log->l_curr_block = roundup(log->l_curr_block, sunit_bb); } @@ -3771,7 +3816,7 @@ xlog_verify_iclog( xlog_in_core_2_t *xhdr; void *base_ptr, *ptr, *p; ptrdiff_t field_offset; - __uint8_t clientid; + uint8_t clientid; int len, i, j, k, op_len; int idx; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b5e71072fde5..bf212772595c 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -124,7 +124,6 @@ struct xlog_ticket; struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; -struct xfs_log_callback; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, @@ -160,7 +159,7 @@ int xfs_log_reserve(struct xfs_mount *mp, int length, int count, struct xlog_ticket **ticket, - __uint8_t clientid, + uint8_t clientid, bool permanent); int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); void xfs_log_unmount(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index a4ab192e1792..fbe72b134bef 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -30,6 +30,9 @@ #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_trace.h" + +struct workqueue_struct *xfs_discard_wq; /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -407,6 +410,7 @@ xlog_cil_insert_items( int len = 0; int diff_iovecs = 0; int iclog_space; + int iovhdr_res = 0, split_res = 0, ctx_res = 0; ASSERT(tp); @@ -416,30 +420,11 @@ xlog_cil_insert_items( */ xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs); - /* - * Now (re-)position everything modified at the tail of the CIL. - * We do this here so we only need to take the CIL lock once during - * the transaction commit. - */ spin_lock(&cil->xc_cil_lock); - list_for_each_entry(lidp, &tp->t_items, lid_trans) { - struct xfs_log_item *lip = lidp->lid_item; - - /* Skip items which aren't dirty in this transaction. */ - if (!(lidp->lid_flags & XFS_LID_DIRTY)) - continue; - - /* - * Only move the item if it isn't already at the tail. This is - * to prevent a transient list_empty() state when reinserting - * an item that is already the only item in the CIL. - */ - if (!list_is_last(&lip->li_cil, &cil->xc_cil)) - list_move_tail(&lip->li_cil, &cil->xc_cil); - } /* account for space used by new iovec headers */ - len += diff_iovecs * sizeof(xlog_op_header_t); + iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t); + len += iovhdr_res; ctx->nvecs += diff_iovecs; /* attach the transaction to the CIL if it has any busy extents */ @@ -454,28 +439,66 @@ xlog_cil_insert_items( * during the transaction commit. */ if (ctx->ticket->t_curr_res == 0) { - ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; - tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res; + ctx_res = ctx->ticket->t_unit_res; + ctx->ticket->t_curr_res = ctx_res; + tp->t_ticket->t_curr_res -= ctx_res; } /* do we need space for more log record headers? */ iclog_space = log->l_iclog_size - log->l_iclog_hsize; if (len > 0 && (ctx->space_used / iclog_space != (ctx->space_used + len) / iclog_space)) { - int hdrs; - - hdrs = (len + iclog_space - 1) / iclog_space; + split_res = (len + iclog_space - 1) / iclog_space; /* need to take into account split region headers, too */ - hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); - ctx->ticket->t_unit_res += hdrs; - ctx->ticket->t_curr_res += hdrs; - tp->t_ticket->t_curr_res -= hdrs; + split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header); + ctx->ticket->t_unit_res += split_res; + ctx->ticket->t_curr_res += split_res; + tp->t_ticket->t_curr_res -= split_res; ASSERT(tp->t_ticket->t_curr_res >= len); } tp->t_ticket->t_curr_res -= len; ctx->space_used += len; + /* + * If we've overrun the reservation, dump the tx details before we move + * the log items. Shutdown is imminent... + */ + if (WARN_ON(tp->t_ticket->t_curr_res < 0)) { + xfs_warn(log->l_mp, "Transaction log reservation overrun:"); + xfs_warn(log->l_mp, + " log items: %d bytes (iov hdrs: %d bytes)", + len, iovhdr_res); + xfs_warn(log->l_mp, " split region headers: %d bytes", + split_res); + xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res); + xlog_print_trans(tp); + } + + /* + * Now (re-)position everything modified at the tail of the CIL. + * We do this here so we only need to take the CIL lock once during + * the transaction commit. + */ + list_for_each_entry(lidp, &tp->t_items, lid_trans) { + struct xfs_log_item *lip = lidp->lid_item; + + /* Skip items which aren't dirty in this transaction. */ + if (!(lidp->lid_flags & XFS_LID_DIRTY)) + continue; + + /* + * Only move the item if it isn't already at the tail. This is + * to prevent a transient list_empty() state when reinserting + * an item that is already the only item in the CIL. + */ + if (!list_is_last(&lip->li_cil, &cil->xc_cil)) + list_move_tail(&lip->li_cil, &cil->xc_cil); + } + spin_unlock(&cil->xc_cil_lock); + + if (tp->t_ticket->t_curr_res < 0) + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); } static void @@ -491,6 +514,75 @@ xlog_cil_free_logvec( } } +static void +xlog_discard_endio_work( + struct work_struct *work) +{ + struct xfs_cil_ctx *ctx = + container_of(work, struct xfs_cil_ctx, discard_endio_work); + struct xfs_mount *mp = ctx->cil->xc_log->l_mp; + + xfs_extent_busy_clear(mp, &ctx->busy_extents, false); + kmem_free(ctx); +} + +/* + * Queue up the actual completion to a thread to avoid IRQ-safe locking for + * pagb_lock. Note that we need a unbounded workqueue, otherwise we might + * get the execution delayed up to 30 seconds for weird reasons. + */ +static void +xlog_discard_endio( + struct bio *bio) +{ + struct xfs_cil_ctx *ctx = bio->bi_private; + + INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); + queue_work(xfs_discard_wq, &ctx->discard_endio_work); +} + +static void +xlog_discard_busy_extents( + struct xfs_mount *mp, + struct xfs_cil_ctx *ctx) +{ + struct list_head *list = &ctx->busy_extents; + struct xfs_extent_busy *busyp; + struct bio *bio = NULL; + struct blk_plug plug; + int error = 0; + + ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); + + blk_start_plug(&plug); + list_for_each_entry(busyp, list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, 0, &bio); + if (error && error != -EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llx,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + break; + } + } + + if (bio) { + bio->bi_private = ctx; + bio->bi_end_io = xlog_discard_endio; + submit_bio(bio); + } else { + xlog_discard_endio_work(&ctx->discard_endio_work); + } + blk_finish_plug(&plug); +} + /* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as @@ -525,14 +617,10 @@ xlog_cil_committed( xlog_cil_free_logvec(ctx->lv_chain); - if (!list_empty(&ctx->busy_extents)) { - ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); - - xfs_discard_extents(mp, &ctx->busy_extents); - xfs_extent_busy_clear(mp, &ctx->busy_extents, false); - } - - kmem_free(ctx); + if (!list_empty(&ctx->busy_extents)) + xlog_discard_busy_extents(mp, ctx); + else + kmem_free(ctx); } /* @@ -905,6 +993,7 @@ xfs_log_commit_cil( { struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; + xfs_lsn_t xc_commit_lsn; /* * Do all necessary memory allocation before we lock the CIL. @@ -918,13 +1007,9 @@ xfs_log_commit_cil( xlog_cil_insert_items(log, tp); - /* check we didn't blow the reservation */ - if (tp->t_ticket->t_curr_res < 0) - xlog_print_tic_res(mp, tp->t_ticket); - - tp->t_commit_lsn = cil->xc_ctx->sequence; + xc_commit_lsn = cil->xc_ctx->sequence; if (commit_lsn) - *commit_lsn = tp->t_commit_lsn; + *commit_lsn = xc_commit_lsn; xfs_log_done(mp, tp->t_ticket, NULL, regrant); xfs_trans_unreserve_and_mod_sb(tp); @@ -940,7 +1025,7 @@ xfs_log_commit_cil( * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, tp->t_commit_lsn, false); + xfs_trans_free_items(tp, xc_commit_lsn, false); xlog_cil_push_background(log); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2b6eec52178e..51bf7b827387 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -257,6 +257,7 @@ struct xfs_cil_ctx { struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct xfs_log_callback log_cb; /* completion callback hook. */ struct list_head committing; /* ctx committing list */ + struct work_struct discard_endio_work; }; /* @@ -418,7 +419,7 @@ struct xlog { }; #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ - ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) + ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) @@ -455,6 +456,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) } void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); +void xlog_print_trans(struct xfs_trans *); int xlog_write( struct xlog *log, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4a98762ec8b4..9549188f5a36 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -352,13 +352,13 @@ xlog_header_check_mount( { ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); - if (uuid_is_nil(&head->h_fs_uuid)) { + if (uuid_is_null(&head->h_fs_uuid)) { /* * IRIX doesn't write the h_fs_uuid or h_fmt fields. If - * h_fs_uuid is nil, we assume this log was last mounted + * h_fs_uuid is null, we assume this log was last mounted * by IRIX and continue. */ - xfs_warn(mp, "nil uuid in log - IRIX style log"); + xfs_warn(mp, "null uuid in log - IRIX style log"); } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); @@ -2230,9 +2230,9 @@ xlog_recover_get_buf_lsn( struct xfs_mount *mp, struct xfs_buf *bp) { - __uint32_t magic32; - __uint16_t magic16; - __uint16_t magicda; + uint32_t magic32; + uint16_t magic16; + uint16_t magicda; void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; @@ -2381,9 +2381,9 @@ xlog_recover_validate_buf_type( xfs_lsn_t current_lsn) { struct xfs_da_blkinfo *info = bp->b_addr; - __uint32_t magic32; - __uint16_t magic16; - __uint16_t magicda; + uint32_t magic32; + uint16_t magic16; + uint16_t magicda; char *warnmsg = NULL; /* @@ -2852,7 +2852,7 @@ xlog_recover_buffer_pass2( if (XFS_DINODE_MAGIC == be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, - (__uint32_t)log->l_mp->m_inode_cluster_size))) { + (uint32_t)log->l_mp->m_inode_cluster_size))) { xfs_buf_stale(bp); error = xfs_bwrite(bp); } else { @@ -3423,7 +3423,7 @@ xlog_recover_efd_pass2( xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; - __uint64_t efi_id; + uint64_t efi_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3519,7 +3519,7 @@ xlog_recover_rud_pass2( struct xfs_rud_log_format *rud_formatp; struct xfs_rui_log_item *ruip = NULL; struct xfs_log_item *lip; - __uint64_t rui_id; + uint64_t rui_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3635,7 +3635,7 @@ xlog_recover_cud_pass2( struct xfs_cud_log_format *cud_formatp; struct xfs_cui_log_item *cuip = NULL; struct xfs_log_item *lip; - __uint64_t cui_id; + uint64_t cui_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3754,7 +3754,7 @@ xlog_recover_bud_pass2( struct xfs_bud_log_format *bud_formatp; struct xfs_bui_log_item *buip = NULL; struct xfs_log_item *lip; - __uint64_t bui_id; + uint64_t bui_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3796,7 +3796,7 @@ xlog_recover_bud_pass2( * This routine is called when an inode create format structure is found in a * committed transaction in the log. It's purpose is to initialise the inodes * being allocated on disk. This requires us to get inode cluster buffers that - * match the range to be intialised, stamped with inode templates and written + * match the range to be initialised, stamped with inode templates and written * by delayed write so that subsequent modifications will hit the cached buffer * and only need writing out at the end of recovery. */ @@ -4152,7 +4152,7 @@ xlog_recover_commit_trans( #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100 - hlist_del(&trans->r_list); + hlist_del_init(&trans->r_list); error = xlog_recover_reorder_trans(log, trans, pass); if (error) @@ -4354,6 +4354,8 @@ xlog_recover_free_trans( xlog_recover_item_t *item, *n; int i; + hlist_del_init(&trans->r_list); + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { /* Free the regions in the item. */ list_del(&item->ri_list); @@ -5224,12 +5226,16 @@ xlog_do_recovery_pass( int error2 = 0; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; + int i; struct hlist_head rhash[XLOG_RHASH_SIZE]; LIST_HEAD (buffer_list); ASSERT(head_blk != tail_blk); rhead_blk = 0; + for (i = 0; i < XLOG_RHASH_SIZE; i++) + INIT_HLIST_HEAD(&rhash[i]); + /* * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. @@ -5466,6 +5472,19 @@ xlog_do_recovery_pass( if (error && first_bad) *first_bad = rhead_blk; + /* + * Transactions are freed at commit time but transactions without commit + * records on disk are never committed. Free any that may be left in the + * hash table. + */ + for (i = 0; i < XLOG_RHASH_SIZE; i++) { + struct hlist_node *tmp; + struct xlog_recover *trans; + + hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list) + xlog_recover_free_trans(trans); + } + return error ? error : error2; } @@ -5772,9 +5791,9 @@ xlog_recover_check_summary( xfs_buf_t *agfbp; xfs_buf_t *agibp; xfs_agnumber_t agno; - __uint64_t freeblks; - __uint64_t itotal; - __uint64_t ifree; + uint64_t freeblks; + uint64_t itotal; + uint64_t ifree; int error; mp = log->l_mp; diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 11792d888e4e..e68bd1050eab 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -110,7 +110,10 @@ assfail(char *expr, char *file, int line) { xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", expr, file, line); - BUG(); + if (xfs_globals.bug_on_assert) + BUG(); + else + WARN_ON(1); } void diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9b9540db17a6..40d4e8b4e193 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -45,6 +45,7 @@ #include "xfs_rmap_btree.h" #include "xfs_refcount_btree.h" #include "xfs_reflink.h" +#include "xfs_extent_busy.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); @@ -72,17 +73,20 @@ xfs_uuid_mount( uuid_t *uuid = &mp->m_sb.sb_uuid; int hole, i; + /* Publish UUID in struct super_block */ + uuid_copy(&mp->m_super->s_uuid, uuid); + if (mp->m_flags & XFS_MOUNT_NOUUID) return 0; - if (uuid_is_nil(uuid)) { - xfs_warn(mp, "Filesystem has nil UUID - can't mount"); + if (uuid_is_null(uuid)) { + xfs_warn(mp, "Filesystem has null UUID - can't mount"); return -EINVAL; } mutex_lock(&xfs_uuid_table_mutex); for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) { + if (uuid_is_null(&xfs_uuid_table[i])) { hole = i; continue; } @@ -119,7 +123,7 @@ xfs_uuid_unmount( mutex_lock(&xfs_uuid_table_mutex); for (i = 0; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) + if (uuid_is_null(&xfs_uuid_table[i])) continue; if (!uuid_equal(uuid, &xfs_uuid_table[i])) continue; @@ -169,7 +173,7 @@ xfs_free_perag( int xfs_sb_validate_fsb_count( xfs_sb_t *sbp, - __uint64_t nblocks) + uint64_t nblocks) { ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); ASSERT(sbp->sb_blocklog >= BBSHIFT); @@ -187,7 +191,7 @@ xfs_initialize_perag( xfs_agnumber_t *maxagi) { xfs_agnumber_t index; - xfs_agnumber_t first_initialised = 0; + xfs_agnumber_t first_initialised = NULLAGNUMBER; xfs_perag_t *pag; int error = -ENOMEM; @@ -202,22 +206,21 @@ xfs_initialize_perag( xfs_perag_put(pag); continue; } - if (!first_initialised) - first_initialised = index; pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); if (!pag) - goto out_unwind; + goto out_unwind_new_pags; pag->pag_agno = index; pag->pag_mount = mp; spin_lock_init(&pag->pag_ici_lock); mutex_init(&pag->pag_ici_reclaim_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); if (xfs_buf_hash_init(pag)) - goto out_unwind; + goto out_free_pag; + init_waitqueue_head(&pag->pagb_wait); if (radix_tree_preload(GFP_NOFS)) - goto out_unwind; + goto out_hash_destroy; spin_lock(&mp->m_perag_lock); if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { @@ -225,10 +228,13 @@ xfs_initialize_perag( spin_unlock(&mp->m_perag_lock); radix_tree_preload_end(); error = -EEXIST; - goto out_unwind; + goto out_hash_destroy; } spin_unlock(&mp->m_perag_lock); radix_tree_preload_end(); + /* first new pag is fully initialized */ + if (first_initialised == NULLAGNUMBER) + first_initialised = index; } index = xfs_set_inode_alloc(mp, agcount); @@ -239,11 +245,16 @@ xfs_initialize_perag( mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); return 0; -out_unwind: +out_hash_destroy: xfs_buf_hash_destroy(pag); +out_free_pag: kmem_free(pag); - for (; index > first_initialised; index--) { +out_unwind_new_pags: + /* unwind any prior newly initialized pags */ + for (index = first_initialised; index < agcount; index++) { pag = radix_tree_delete(&mp->m_perag_tree, index); + if (!pag) + break; xfs_buf_hash_destroy(pag); kmem_free(pag); } @@ -424,7 +435,7 @@ STATIC void xfs_set_maxicount(xfs_mount_t *mp) { xfs_sb_t *sbp = &(mp->m_sb); - __uint64_t icount; + uint64_t icount; if (sbp->sb_imax_pct) { /* @@ -490,7 +501,7 @@ xfs_set_low_space_thresholds( int i; for (i = 0; i < XFS_LOWSP_MAX; i++) { - __uint64_t space = mp->m_sb.sb_dblocks; + uint64_t space = mp->m_sb.sb_dblocks; do_div(space, 100); mp->m_low_space[i] = space * (i + 1); @@ -505,8 +516,7 @@ STATIC void xfs_set_inoalignment(xfs_mount_t *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; else mp->m_inoalign_mask = 0; @@ -587,10 +597,10 @@ xfs_mount_reset_sbqflags( return xfs_sync_sb(mp, false); } -__uint64_t +uint64_t xfs_default_resblks(xfs_mount_t *mp) { - __uint64_t resblks; + uint64_t resblks; /* * We default to 5% or 8192 fsbs of space reserved, whichever is @@ -601,7 +611,7 @@ xfs_default_resblks(xfs_mount_t *mp) */ resblks = mp->m_sb.sb_dblocks; do_div(resblks, 20); - resblks = min_t(__uint64_t, resblks, 8192); + resblks = min_t(uint64_t, resblks, 8192); return resblks; } @@ -621,7 +631,7 @@ xfs_mountfs( { struct xfs_sb *sbp = &(mp->m_sb); struct xfs_inode *rip; - __uint64_t resblks; + uint64_t resblks; uint quotamount = 0; uint quotaflags = 0; int error = 0; @@ -709,10 +719,13 @@ xfs_mountfs( if (error) goto out_del_stats; + error = xfs_errortag_init(mp); + if (error) + goto out_remove_error_sysfs; error = xfs_uuid_mount(mp); if (error) - goto out_remove_error_sysfs; + goto out_remove_errortag; /* * Set the minimum read and write sizes @@ -782,7 +795,10 @@ xfs_mountfs( * Copies the low order bits of the timestamp and the randomly * set "sequence" number out of a UUID. */ - uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); + mp->m_fixedfsid[0] = + (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) | + get_unaligned_be16(&sbp->sb_uuid.b[4]); + mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]); mp->m_dmevmask = 0; /* not persistent; set after each mount */ @@ -1031,6 +1047,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_remove_errortag: + xfs_errortag_del(mp); out_remove_error_sysfs: xfs_error_sysfs_del(mp); out_del_stats: @@ -1049,7 +1067,7 @@ void xfs_unmountfs( struct xfs_mount *mp) { - __uint64_t resblks; + uint64_t resblks; int error; cancel_delayed_work_sync(&mp->m_eofblocks_work); @@ -1073,6 +1091,13 @@ xfs_unmountfs( xfs_log_force(mp, XFS_LOG_SYNC); /* + * Wait for all busy extents to be freed, including completion of + * any discard operation. + */ + xfs_extent_busy_wait_all(mp); + flush_workqueue(xfs_discard_wq); + + /* * We now need to tell the world we are unmounting. This will allow * us to detect that the filesystem is going away and we should error * out anything that we have been retrying in the background. This will @@ -1127,10 +1152,11 @@ xfs_unmountfs( xfs_uuid_unmount(mp); #if defined(DEBUG) - xfs_errortag_clearall(mp, 0); + xfs_errortag_clearall(mp); #endif xfs_free_perag(mp); + xfs_errortag_del(mp); xfs_error_sysfs_del(mp); xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_kobj); @@ -1191,7 +1217,7 @@ xfs_mod_icount( struct xfs_mount *mp, int64_t delta) { - __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); + percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH); if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { ASSERT(0); percpu_counter_add(&mp->m_icount, -delta); @@ -1270,7 +1296,7 @@ xfs_mod_fdblocks( else batch = XFS_FDBLOCKS_BATCH; - __percpu_counter_add(&mp->m_fdblocks, delta, batch); + percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7f351f706b7a..e0792d036be2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -108,10 +108,10 @@ typedef struct xfs_mount { xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ - __uint8_t m_blkbit_log; /* blocklog + NBBY */ - __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ - __uint8_t m_agno_log; /* log #ag's */ - __uint8_t m_agino_log; /* #bits for agino in inum */ + uint8_t m_blkbit_log; /* blocklog + NBBY */ + uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ + uint8_t m_agno_log; /* log #ag's */ + uint8_t m_agino_log; /* #bits for agino in inum */ uint m_inode_cluster_size;/* min inode buf size */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ @@ -139,7 +139,7 @@ typedef struct xfs_mount { struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ - __uint64_t m_flags; /* global mount flags */ + uint64_t m_flags; /* global mount flags */ bool m_inotbt_nores; /* no per-AG finobt resv. */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ @@ -148,14 +148,14 @@ typedef struct xfs_mount { int m_inoalign_mask;/* mask sb_inoalignmt if used */ uint m_qflags; /* quota status flags */ struct xfs_trans_resv m_resv; /* precomputed res values */ - __uint64_t m_maxicount; /* maximum inode count */ - __uint64_t m_resblks; /* total reserved blocks */ - __uint64_t m_resblks_avail;/* available reserved blocks */ - __uint64_t m_resblks_save; /* reserved blks @ remount,ro */ + uint64_t m_maxicount; /* maximum inode count */ + uint64_t m_resblks; /* total reserved blocks */ + uint64_t m_resblks_avail;/* available reserved blocks */ + uint64_t m_resblks_save; /* reserved blks @ remount,ro */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ int m_sinoalign; /* stripe unit inode alignment */ - __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ + uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ @@ -183,6 +183,7 @@ typedef struct xfs_mount { struct workqueue_struct *m_reclaim_workqueue; struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; + struct workqueue_struct *m_sync_workqueue; /* * Generation of the filesysyem layout. This is incremented by each @@ -193,18 +194,17 @@ typedef struct xfs_mount { * ever support shrinks it would have to be persisted in addition * to various other kinds of pain inflicted on the pNFS server. */ - __uint32_t m_generation; + uint32_t m_generation; bool m_fail_unmount; #ifdef DEBUG /* - * DEBUG mode instrumentation to test and/or trigger delayed allocation - * block killing in the event of failed writes. When enabled, all - * buffered writes are forced to fail. All delalloc blocks in the range - * of the write (including pre-existing delalloc blocks!) are tossed as - * part of the write failure error handling sequence. + * Frequency with which errors are injected. Replaces xfs_etest; the + * value stored in here is the inverse of the frequency with which the + * error triggers. 1 = always, 2 = half the time, etc. */ - bool m_fail_writes; + unsigned int *m_errortag; + struct xfs_kobj m_errortag_kobj; #endif } xfs_mount_t; @@ -311,7 +311,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, static inline xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) { - xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); + xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); do_div(ld, mp->m_sb.sb_agblocks); return (xfs_agnumber_t) ld; } @@ -319,24 +319,10 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) static inline xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) { - xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); + xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } -#ifdef DEBUG -static inline bool -xfs_mp_fail_writes(struct xfs_mount *mp) -{ - return mp->m_fail_writes; -} -#else -static inline bool -xfs_mp_fail_writes(struct xfs_mount *mp) -{ - return 0; -} -#endif - /* per-AG block reservation data structures*/ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, @@ -365,12 +351,12 @@ typedef struct xfs_perag { char pagi_init; /* this agi's entry is initialized */ char pagf_metadata; /* the agf is preferred to be metadata */ char pagi_inodeok; /* The agi is ok for inodes */ - __uint8_t pagf_levels[XFS_BTNUM_AGF]; + uint8_t pagf_levels[XFS_BTNUM_AGF]; /* # of levels in bno & cnt btree */ - __uint32_t pagf_flcount; /* count of blocks in freelist */ + uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ - __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ + uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ @@ -384,6 +370,8 @@ typedef struct xfs_perag { xfs_agino_t pagl_rightrec; spinlock_t pagb_lock; /* lock for pagb_tree */ struct rb_root pagb_tree; /* ordered tree of busy extents */ + unsigned int pagb_gen; /* generation count for pagb_tree */ + wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ atomic_t pagf_fstrms; /* # of filestreams active in this AG */ @@ -407,7 +395,7 @@ typedef struct xfs_perag { struct xfs_ag_resv pag_agfl_resv; /* reference count */ - __uint8_t pagf_refcount_level; + uint8_t pagf_refcount_level; } xfs_perag_t; static inline struct xfs_ag_resv * @@ -430,7 +418,7 @@ void xfs_buf_hash_destroy(xfs_perag_t *pag); extern void xfs_uuid_table_free(void); extern int xfs_log_sbcount(xfs_mount_t *); -extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); +extern uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, xfs_agnumber_t *maxagi); @@ -446,7 +434,7 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); -extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); +extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b669b123287b..6ce948c436d5 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -851,8 +851,8 @@ xfs_qm_reset_dqcounts( * started afresh by xfs_qm_quotacheck. */ #ifdef DEBUG - j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - do_div(j, sizeof(xfs_dqblk_t)); + j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) / + sizeof(xfs_dqblk_t); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif dqb = bp->b_addr; @@ -1247,6 +1247,7 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { + struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1257,7 +1258,32 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - xfs_dqflock(dqp); + /* + * The only way the dquot is already flush locked by the time quotacheck + * gets here is if reclaim flushed it before the dqadjust walk dirtied + * it for the final time. Quotacheck collects all dquot bufs in the + * local delwri queue before dquots are dirtied, so reclaim can't have + * possibly queued it for I/O. The only way out is to push the buffer to + * cycle the flush lock. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* buf is pinned in-core by delwri list */ + DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen); + bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL); + if (!bp) { + error = -EINVAL; + goto out_unlock; + } + xfs_buf_unlock(bp); + + xfs_buf_delwri_pushbuf(bp, buffer_list); + xfs_buf_rele(bp); + + error = -EAGAIN; + goto out_unlock; + } + error = xfs_qm_dqflush(dqp, &bp); if (error) goto out_unlock; @@ -1384,12 +1410,7 @@ xfs_qm_quotacheck( mp->m_qflags |= flags; error_return: - while (!list_empty(&buffer_list)) { - struct xfs_buf *bp = - list_first_entry(&buffer_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_buf_relse(bp); - } + xfs_buf_delwri_cancel(&buffer_list); if (error) { xfs_warn(mp, diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 3e52d5de7ae1..2be6d2735ca9 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -33,7 +33,7 @@ xfs_fill_statvfs_from_dquot( struct kstatfs *statp, struct xfs_dquot *dqp) { - __uint64_t limit; + uint64_t limit; limit = dqp->q_core.d_blk_softlimit ? be64_to_cpu(dqp->q_core.d_blk_softlimit) : diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 475a3882a81f..9cb5c381b01c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); + xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, + XFS_AGITER_INEW_WAIT); } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index f82d79a8c694..de9493253edf 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -269,7 +269,6 @@ xfs_fs_get_nextdqblk( /* ID may be different, so convert back what we got */ *qid = make_kqid(current_user_ns(), qid->type, id); return 0; - } STATIC int diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 6e4c7446c3d4..96fe209b5eb6 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -221,6 +221,7 @@ void xfs_cui_release( struct xfs_cui_log_item *cuip) { + ASSERT(atomic_read(&cuip->cui_refcount) > 0); if (atomic_dec_and_test(&cuip->cui_refcount)) { xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); xfs_cui_item_free(cuip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 07593a362cd0..ab2270a87196 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -82,11 +82,22 @@ * mappings are a reservation against the free space in the filesystem; * adjacent mappings can also be combined into fewer larger mappings. * + * As an optimization, the CoW extent size hint (cowextsz) creates + * outsized aligned delalloc reservations in the hope of landing out of + * order nearby CoW writes in a single extent on disk, thereby reducing + * fragmentation and improving future performance. + * + * D: --RRRRRRSSSRRRRRRRR--- (data fork) + * C: ------DDDDDDD--------- (CoW fork) + * * When dirty pages are being written out (typically in writepage), the - * delalloc reservations are converted into real mappings by allocating - * blocks and replacing the delalloc mapping with real ones. A delalloc - * mapping can be replaced by several real ones if the free space is - * fragmented. + * delalloc reservations are converted into unwritten mappings by + * allocating blocks and replacing the delalloc mapping with real ones. + * A delalloc mapping can be replaced by several unwritten ones if the + * free space is fragmented. + * + * D: --RRRRRRSSSRRRRRRRR--- + * C: ------UUUUUUU--------- * * We want to adapt the delalloc mechanism for copy-on-write, since the * write paths are similar. The first two steps (creating the reservation @@ -101,13 +112,29 @@ * Block-aligned directio writes will use the same mechanism as buffered * writes. * + * Just prior to submitting the actual disk write requests, we convert + * the extents representing the range of the file actually being written + * (as opposed to extra pieces created for the cowextsize hint) to real + * extents. This will become important in the next step: + * + * D: --RRRRRRSSSRRRRRRRR--- + * C: ------UUrrUUU--------- + * * CoW remapping must be done after the data block write completes, * because we don't want to destroy the old data fork map until we're sure * the new block has been written. Since the new mappings are kept in a * separate fork, we can simply iterate these mappings to find the ones * that cover the file blocks that we just CoW'd. For each extent, simply * unmap the corresponding range in the data fork, map the new range into - * the data fork, and remove the extent from the CoW fork. + * the data fork, and remove the extent from the CoW fork. Because of + * the presence of the cowextsize hint, however, we must be careful + * only to remap the blocks that we've actually written out -- we must + * never remap delalloc reservations nor CoW staging blocks that have + * yet to be written. This corresponds exactly to the real extents in + * the CoW fork: + * + * D: --RRRRRRrrSRRRRRRRR--- + * C: ------UU--UUU--------- * * Since the remapping operation can be applied to an arbitrary file * range, we record the need for the remap step as a flag in the ioend @@ -128,6 +155,7 @@ int xfs_reflink_find_shared( struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, @@ -139,18 +167,18 @@ xfs_reflink_find_shared( struct xfs_btree_cur *cur; int error; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, find_end_of_shared); xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); return error; } @@ -179,11 +207,7 @@ xfs_reflink_trim_around_shared( int error = 0; /* Holes, unwritten, and delalloc extents cannot be shared */ - if (!xfs_is_reflink_inode(ip) || - ISUNWRITTEN(irec) || - irec->br_startblock == HOLESTARTBLOCK || - irec->br_startblock == DELAYSTARTBLOCK || - isnullstartblock(irec->br_startblock)) { + if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) { *shared = false; return 0; } @@ -194,7 +218,7 @@ xfs_reflink_trim_around_shared( agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock); aglen = irec->br_blockcount; - error = xfs_reflink_find_shared(ip->i_mount, agno, agbno, + error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno, aglen, &fbno, &flen, true); if (error) return error; @@ -296,103 +320,165 @@ xfs_reflink_reserve_cow( return 0; } -/* Allocate all CoW reservations covering a range of blocks in a file. */ -static int -__xfs_reflink_allocate_cow( - struct xfs_inode *ip, - xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb) +/* Convert part of an unwritten CoW extent to a real one. */ +STATIC int +xfs_reflink_convert_cow_extent( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + struct xfs_defer_ops *dfops) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_bmbt_irec imap; - struct xfs_defer_ops dfops; - struct xfs_trans *tp; - xfs_fsblock_t first_block; - int nimaps = 1, error; - bool shared; + xfs_fsblock_t first_block; + int nimaps = 1; - xfs_defer_init(&dfops, &first_block); - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, - XFS_TRANS_RESERVE, &tp); - if (error) - return error; + if (imap->br_state == XFS_EXT_NORM) + return 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trim_extent(imap, offset_fsb, count_fsb); + trace_xfs_reflink_convert_cow(ip, imap); + if (imap->br_blockcount == 0) + return 0; + return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount, + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, + 0, imap, &nimaps, dfops); +} - /* Read extent from the source file. */ - nimaps = 1; - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, - &imap, &nimaps, 0); - if (error) - goto out_unlock; - ASSERT(nimaps == 1); +/* Convert all of the unwritten CoW extents in a file's range to real ones. */ +int +xfs_reflink_convert_cow( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_bmbt_irec got; + struct xfs_defer_ops dfops; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); + xfs_extnum_t idx; + bool found; + int error = 0; - error = xfs_reflink_reserve_cow(ip, &imap, &shared); - if (error) - goto out_trans_cancel; + xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!shared) { - *offset_fsb = imap.br_startoff + imap.br_blockcount; - goto out_trans_cancel; + /* Convert all the extents to real from unwritten. */ + for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); + found && got.br_startoff < end_fsb; + found = xfs_iext_get_extent(ifp, ++idx, &got)) { + error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, + end_fsb - offset_fsb, &dfops); + if (error) + break; } - xfs_trans_ijoin(tp, ip, 0); - error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, - XFS_BMAPI_COWFORK, &first_block, - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), - &imap, &nimaps, &dfops); - if (error) - goto out_trans_cancel; - - error = xfs_defer_finish(&tp, &dfops, NULL); - if (error) - goto out_trans_cancel; - - error = xfs_trans_commit(tp); - - *offset_fsb = imap.br_startoff + imap.br_blockcount; -out_unlock: + /* Finish up. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; -out_trans_cancel: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_unlock; } -/* Allocate all CoW reservations covering a part of a file. */ +/* Allocate all CoW reservations covering a range of blocks in a file. */ int -xfs_reflink_allocate_cow_range( +xfs_reflink_allocate_cow( struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t count) + struct xfs_bmbt_irec *imap, + bool *shared, + uint *lockmode) { struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); - int error; + xfs_fileoff_t offset_fsb = imap->br_startoff; + xfs_filblks_t count_fsb = imap->br_blockcount; + struct xfs_bmbt_irec got; + struct xfs_defer_ops dfops; + struct xfs_trans *tp = NULL; + xfs_fsblock_t first_block; + int nimaps, error = 0; + bool trimmed; + xfs_filblks_t resaligned; + xfs_extlen_t resblks = 0; + xfs_extnum_t idx; +retry: ASSERT(xfs_is_reflink_inode(ip)); - - trace_xfs_reflink_allocate_cow_range(ip, offset, count); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); /* - * Make sure that the dquots are there. + * Even if the extent is not shared we might have a preallocation for + * it in the COW fork. If so use it. */ - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && + got.br_startoff <= offset_fsb) { + *shared = true; - while (offset_fsb < end_fsb) { - error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); - if (error) { - trace_xfs_reflink_allocate_cow_range_error(ip, error, - _RET_IP_); - break; + /* If we have a real allocation in the COW fork we're done. */ + if (!isnullstartblock(got.br_startblock)) { + xfs_trim_extent(&got, offset_fsb, count_fsb); + *imap = got; + goto convert; } + + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + } else { + error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); + if (error || !*shared) + goto out; + } + + if (!tp) { + resaligned = xfs_aligned_fsb_count(imap->br_startoff, + imap->br_blockcount, xfs_get_cowextsz_hint(ip)); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); + + xfs_iunlock(ip, *lockmode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + *lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, *lockmode); + + if (error) + return error; + + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out; + goto retry; } + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out; + + xfs_trans_ijoin(tp, ip, 0); + + xfs_defer_init(&dfops, &first_block); + nimaps = 1; + + /* Allocate the entire reservation as unwritten blocks. */ + error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, + resblks, imap, &nimaps, &dfops); + if (error) + goto out_bmap_cancel; + + /* Finish up. */ + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_bmap_cancel; + + error = xfs_trans_commit(tp); + if (error) + return error; +convert: + return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, + &dfops); +out_bmap_cancel: + xfs_defer_cancel(&dfops); + xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, + XFS_QMOPT_RES_REGBLKS); +out: + if (tp) + xfs_trans_cancel(tp); return error; } @@ -459,14 +545,18 @@ xfs_reflink_trim_irec_to_next_cow( } /* - * Cancel all pending CoW reservations for some block range of an inode. + * Cancel CoW reservations for some block range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb) + xfs_fileoff_t end_fsb, + bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; @@ -490,7 +580,7 @@ xfs_reflink_cancel_cow_blocks( &idx, &got, &del); if (error) break; - } else { + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); @@ -532,13 +622,17 @@ xfs_reflink_cancel_cow_blocks( } /* - * Cancel all pending CoW reservations for some byte range of an inode. + * Cancel CoW reservations for some byte range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; @@ -564,7 +658,8 @@ xfs_reflink_cancel_cow_range( xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, + cancel_real); if (error) goto out_cancel; @@ -611,8 +706,22 @@ xfs_reflink_end_cow( offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - /* Start a rolling transaction to switch the mappings */ - resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + /* + * Start a rolling transaction to switch the mappings. We're + * unlikely ever to have to remap 16T worth of single-block + * extents, so just cap the worst case extent count to 2^32-1. + * Stick a warning in just in case, and avoid 64-bit division. + */ + BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX); + if (end_fsb - offset_fsb > UINT_MAX) { + error = -EFSCORRUPTED; + xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE); + ASSERT(0); + goto out; + } + resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount, + (unsigned int)(end_fsb - offset_fsb), + XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, resblks, 0, 0, &tp); if (error) @@ -641,6 +750,16 @@ xfs_reflink_end_cow( ASSERT(!isnullstartblock(got.br_startblock)); + /* + * Don't remap unwritten extents; these are + * speculatively preallocated CoW extents that have been + * allocated but have not yet been involved in a write. + */ + if (got.br_state == XFS_EXT_UNWRITTEN) { + idx--; + goto next_extent; + } + /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); rlen = del.br_blockcount; @@ -855,13 +974,14 @@ STATIC int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, - xfs_extlen_t cowextsize) + xfs_extlen_t cowextsize, + bool is_dedupe) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -882,6 +1002,10 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } + if (!is_dedupe) { + xfs_trans_ichgtime(tp, dest, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); @@ -932,12 +1056,12 @@ xfs_reflink_remap_extent( xfs_off_t new_isize) { struct xfs_mount *mp = ip->i_mount; + bool real_extent = xfs_bmap_is_real_extent(irec); struct xfs_trans *tp; xfs_fsblock_t firstfsb; unsigned int resblks; struct xfs_defer_ops dfops; struct xfs_bmbt_irec uirec; - bool real_extent; xfs_filblks_t rlen; xfs_filblks_t unmap_len; xfs_off_t newlen; @@ -946,11 +1070,6 @@ xfs_reflink_remap_extent( unmap_len = irec->br_startoff + irec->br_blockcount - destoff; trace_xfs_reflink_punch_range(ip, destoff, unmap_len); - /* Only remap normal extents. */ - real_extent = (irec->br_startblock != HOLESTARTBLOCK && - irec->br_startblock != DELAYSTARTBLOCK && - !ISUNWRITTEN(irec)); - /* No reflinking if we're low on space */ if (real_extent) { error = xfs_reflink_ag_has_free_space(mp, @@ -1195,7 +1314,8 @@ xfs_reflink_remap_range( !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, + is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); @@ -1245,9 +1365,7 @@ xfs_reflink_dirty_extents( goto out; if (nmaps == 0) break; - if (map[0].br_startblock == HOLESTARTBLOCK || - map[0].br_startblock == DELAYSTARTBLOCK || - ISUNWRITTEN(&map[0])) + if (!xfs_bmap_is_real_extent(&map[0])) goto next; map[1] = map[0]; @@ -1256,8 +1374,8 @@ xfs_reflink_dirty_extents( agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); aglen = map[1].br_blockcount; - error = xfs_reflink_find_shared(mp, agno, agbno, aglen, - &rbno, &rlen, true); + error = xfs_reflink_find_shared(mp, NULL, agno, agbno, + aglen, &rbno, &rlen, true); if (error) goto out; if (rbno == NULLAGBLOCK) @@ -1288,64 +1406,78 @@ out: return error; } -/* Clear the inode reflink flag if there are no shared extents. */ +/* Does this inode need the reflink flag? */ int -xfs_reflink_clear_inode_flag( - struct xfs_inode *ip, - struct xfs_trans **tpp) +xfs_reflink_inode_has_shared_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + bool *has_shared) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t fbno; - xfs_filblks_t end; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_extlen_t aglen; - xfs_agblock_t rbno; - xfs_extlen_t rlen; - struct xfs_bmbt_irec map; - int nmaps; - int error = 0; - - ASSERT(xfs_is_reflink_inode(ip)); + struct xfs_bmbt_irec got; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t aglen; + xfs_agblock_t rbno; + xfs_extlen_t rlen; + xfs_extnum_t idx; + bool found; + int error; - fbno = 0; - end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip))); - while (end - fbno > 0) { - nmaps = 1; - /* - * Look for extents in the file. Skip holes, delalloc, or - * unwritten extents; they can't be reflinked. - */ - error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) return error; - if (nmaps == 0) - break; - if (map.br_startblock == HOLESTARTBLOCK || - map.br_startblock == DELAYSTARTBLOCK || - ISUNWRITTEN(&map)) - goto next; + } - agno = XFS_FSB_TO_AGNO(mp, map.br_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock); - aglen = map.br_blockcount; + *has_shared = false; + found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); + while (found) { + if (isnullstartblock(got.br_startblock) || + got.br_state != XFS_EXT_NORM) + goto next; + agno = XFS_FSB_TO_AGNO(mp, got.br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock); + aglen = got.br_blockcount; - error = xfs_reflink_find_shared(mp, agno, agbno, aglen, + error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen, &rbno, &rlen, false); if (error) return error; /* Is there still a shared block here? */ - if (rbno != NULLAGBLOCK) + if (rbno != NULLAGBLOCK) { + *has_shared = true; return 0; + } next: - fbno = map.br_startoff + map.br_blockcount; + found = xfs_iext_get_extent(ifp, ++idx, &got); } + return 0; +} + +/* Clear the inode reflink flag if there are no shared extents. */ +int +xfs_reflink_clear_inode_flag( + struct xfs_inode *ip, + struct xfs_trans **tpp) +{ + bool needs_flag; + int error = 0; + + ASSERT(xfs_is_reflink_inode(ip)); + + error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag); + if (error || needs_flag) + return error; + /* * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index aa6a4d64bd35..701487bab468 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -20,16 +20,18 @@ #ifndef __XFS_REFLINK_H #define __XFS_REFLINK_H 1 -extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, - xfs_extlen_t *flen, bool find_maximal); +extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, + xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal); extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared); -extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, - xfs_off_t offset, xfs_off_t count); +extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode); +extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t count); extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, struct xfs_bmbt_irec *imap); extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, @@ -37,14 +39,16 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb); + xfs_fileoff_t end_fsb, bool cancel_real); extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count); + xfs_off_t count, bool cancel_real); extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); +extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, + struct xfs_inode *ip, bool *has_shared); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 73c827831551..f3b139c9aa16 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -243,6 +243,7 @@ void xfs_rui_release( struct xfs_rui_log_item *ruip) { + ASSERT(atomic_read(&ruip->rui_refcount) > 0); if (atomic_dec_and_test(&ruip->rui_refcount)) { xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); xfs_rui_item_free(ruip); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 802bcc326d9f..91472193643b 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1093,7 +1093,6 @@ xfs_rtallocate_extent( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ int wasdel, /* was a delayed allocation extent */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -1123,27 +1122,16 @@ xfs_rtallocate_extent( } } +retry: sumbp = NULL; - /* - * Allocate by size, or near another block, or exactly at some block. - */ - switch (type) { - case XFS_ALLOCTYPE_ANY_AG: + if (bno == 0) { error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len, &sumbp, &sb, prod, &r); - break; - case XFS_ALLOCTYPE_NEAR_BNO: + } else { error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen, len, &sumbp, &sb, prod, &r); - break; - case XFS_ALLOCTYPE_THIS_BNO: - error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, - len, &sumbp, &sb, prod, &r); - break; - default: - error = -EIO; - ASSERT(0); } + if (error) return error; @@ -1158,7 +1146,11 @@ xfs_rtallocate_extent( xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen); else xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen); + } else if (prod > 1) { + prod = 1; + goto retry; } + *rtblock = r; return 0; } @@ -1264,13 +1256,13 @@ xfs_rtpick_extent( { xfs_rtblock_t b; /* result block */ int log2; /* log of sequence number */ - __uint64_t resid; /* residual after log removed */ - __uint64_t seq; /* sequence number of file creation */ - __uint64_t *seqp; /* pointer to seqno in inode */ + uint64_t resid; /* residual after log removed */ + uint64_t seq; /* sequence number of file creation */ + uint64_t *seqp; /* pointer to seqno in inode */ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime; + seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime; if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; *seqp = 0; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 355dd9e1cb64..79defa722bf1 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -23,6 +23,16 @@ struct xfs_mount; struct xfs_trans; +struct xfs_rtalloc_rec { + xfs_rtblock_t ar_startblock; + xfs_rtblock_t ar_blockcount; +}; + +typedef int (*xfs_rtalloc_query_range_fn)( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv); + #ifdef CONFIG_XFS_RT /* * Function prototypes for exported functions. @@ -40,7 +50,6 @@ xfs_rtallocate_extent( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ int wasdel, /* was a delayed allocation extent */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock); /* out: start block allocated */ @@ -98,6 +107,8 @@ xfs_growfs_rt( /* * From xfs_rtbitmap.c */ +int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t block, int issum, struct xfs_buf **bpp); int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val, xfs_rtblock_t *new, int *stat); @@ -119,13 +130,22 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); - - +int xfs_rtalloc_query_range(struct xfs_trans *tp, + struct xfs_rtalloc_rec *low_rec, + struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, + void *priv); +int xfs_rtalloc_query_all(struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv); #else -# define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS) +# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) # define xfs_growfs_rt(mp,in) (ENOSYS) +# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) +# define xfs_rtalloc_query_all(t,f,p) (ENOSYS) +# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) static inline int /* error */ xfs_rtmount_init( xfs_mount_t *mp) /* file system mount structure */ diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index f11282c96887..056e12b421eb 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -33,9 +33,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { int i, j; int len = 0; - __uint64_t xs_xstrat_bytes = 0; - __uint64_t xs_write_bytes = 0; - __uint64_t xs_read_bytes = 0; + uint64_t xs_xstrat_bytes = 0; + uint64_t xs_write_bytes = 0; + uint64_t xs_read_bytes = 0; static const struct xstats_entry { char *desc; @@ -100,7 +100,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) void xfs_stats_clearall(struct xfsstats __percpu *stats) { int c; - __uint32_t vn_active; + uint32_t vn_active; xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 375840f5a99a..f64d0ae345c4 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -54,125 +54,125 @@ enum { */ struct __xfsstats { # define XFSSTAT_END_EXTENT_ALLOC 4 - __uint32_t xs_allocx; - __uint32_t xs_allocb; - __uint32_t xs_freex; - __uint32_t xs_freeb; + uint32_t xs_allocx; + uint32_t xs_allocb; + uint32_t xs_freex; + uint32_t xs_freeb; # define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) - __uint32_t xs_abt_lookup; - __uint32_t xs_abt_compare; - __uint32_t xs_abt_insrec; - __uint32_t xs_abt_delrec; + uint32_t xs_abt_lookup; + uint32_t xs_abt_compare; + uint32_t xs_abt_insrec; + uint32_t xs_abt_delrec; # define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) - __uint32_t xs_blk_mapr; - __uint32_t xs_blk_mapw; - __uint32_t xs_blk_unmap; - __uint32_t xs_add_exlist; - __uint32_t xs_del_exlist; - __uint32_t xs_look_exlist; - __uint32_t xs_cmp_exlist; + uint32_t xs_blk_mapr; + uint32_t xs_blk_mapw; + uint32_t xs_blk_unmap; + uint32_t xs_add_exlist; + uint32_t xs_del_exlist; + uint32_t xs_look_exlist; + uint32_t xs_cmp_exlist; # define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) - __uint32_t xs_bmbt_lookup; - __uint32_t xs_bmbt_compare; - __uint32_t xs_bmbt_insrec; - __uint32_t xs_bmbt_delrec; + uint32_t xs_bmbt_lookup; + uint32_t xs_bmbt_compare; + uint32_t xs_bmbt_insrec; + uint32_t xs_bmbt_delrec; # define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) - __uint32_t xs_dir_lookup; - __uint32_t xs_dir_create; - __uint32_t xs_dir_remove; - __uint32_t xs_dir_getdents; + uint32_t xs_dir_lookup; + uint32_t xs_dir_create; + uint32_t xs_dir_remove; + uint32_t xs_dir_getdents; # define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) - __uint32_t xs_trans_sync; - __uint32_t xs_trans_async; - __uint32_t xs_trans_empty; + uint32_t xs_trans_sync; + uint32_t xs_trans_async; + uint32_t xs_trans_empty; # define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) - __uint32_t xs_ig_attempts; - __uint32_t xs_ig_found; - __uint32_t xs_ig_frecycle; - __uint32_t xs_ig_missed; - __uint32_t xs_ig_dup; - __uint32_t xs_ig_reclaims; - __uint32_t xs_ig_attrchg; + uint32_t xs_ig_attempts; + uint32_t xs_ig_found; + uint32_t xs_ig_frecycle; + uint32_t xs_ig_missed; + uint32_t xs_ig_dup; + uint32_t xs_ig_reclaims; + uint32_t xs_ig_attrchg; # define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) - __uint32_t xs_log_writes; - __uint32_t xs_log_blocks; - __uint32_t xs_log_noiclogs; - __uint32_t xs_log_force; - __uint32_t xs_log_force_sleep; + uint32_t xs_log_writes; + uint32_t xs_log_blocks; + uint32_t xs_log_noiclogs; + uint32_t xs_log_force; + uint32_t xs_log_force_sleep; # define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) - __uint32_t xs_try_logspace; - __uint32_t xs_sleep_logspace; - __uint32_t xs_push_ail; - __uint32_t xs_push_ail_success; - __uint32_t xs_push_ail_pushbuf; - __uint32_t xs_push_ail_pinned; - __uint32_t xs_push_ail_locked; - __uint32_t xs_push_ail_flushing; - __uint32_t xs_push_ail_restarts; - __uint32_t xs_push_ail_flush; + uint32_t xs_try_logspace; + uint32_t xs_sleep_logspace; + uint32_t xs_push_ail; + uint32_t xs_push_ail_success; + uint32_t xs_push_ail_pushbuf; + uint32_t xs_push_ail_pinned; + uint32_t xs_push_ail_locked; + uint32_t xs_push_ail_flushing; + uint32_t xs_push_ail_restarts; + uint32_t xs_push_ail_flush; # define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) - __uint32_t xs_xstrat_quick; - __uint32_t xs_xstrat_split; + uint32_t xs_xstrat_quick; + uint32_t xs_xstrat_split; # define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) - __uint32_t xs_write_calls; - __uint32_t xs_read_calls; + uint32_t xs_write_calls; + uint32_t xs_read_calls; # define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) - __uint32_t xs_attr_get; - __uint32_t xs_attr_set; - __uint32_t xs_attr_remove; - __uint32_t xs_attr_list; + uint32_t xs_attr_get; + uint32_t xs_attr_set; + uint32_t xs_attr_remove; + uint32_t xs_attr_list; # define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) - __uint32_t xs_iflush_count; - __uint32_t xs_icluster_flushcnt; - __uint32_t xs_icluster_flushinode; + uint32_t xs_iflush_count; + uint32_t xs_icluster_flushcnt; + uint32_t xs_icluster_flushinode; # define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) - __uint32_t vn_active; /* # vnodes not on free lists */ - __uint32_t vn_alloc; /* # times vn_alloc called */ - __uint32_t vn_get; /* # times vn_get called */ - __uint32_t vn_hold; /* # times vn_hold called */ - __uint32_t vn_rele; /* # times vn_rele called */ - __uint32_t vn_reclaim; /* # times vn_reclaim called */ - __uint32_t vn_remove; /* # times vn_remove called */ - __uint32_t vn_free; /* # times vn_free called */ + uint32_t vn_active; /* # vnodes not on free lists */ + uint32_t vn_alloc; /* # times vn_alloc called */ + uint32_t vn_get; /* # times vn_get called */ + uint32_t vn_hold; /* # times vn_hold called */ + uint32_t vn_rele; /* # times vn_rele called */ + uint32_t vn_reclaim; /* # times vn_reclaim called */ + uint32_t vn_remove; /* # times vn_remove called */ + uint32_t vn_free; /* # times vn_free called */ #define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) - __uint32_t xb_get; - __uint32_t xb_create; - __uint32_t xb_get_locked; - __uint32_t xb_get_locked_waited; - __uint32_t xb_busy_locked; - __uint32_t xb_miss_locked; - __uint32_t xb_page_retries; - __uint32_t xb_page_found; - __uint32_t xb_get_read; + uint32_t xb_get; + uint32_t xb_create; + uint32_t xb_get_locked; + uint32_t xb_get_locked_waited; + uint32_t xb_busy_locked; + uint32_t xb_miss_locked; + uint32_t xb_page_retries; + uint32_t xb_page_found; + uint32_t xb_get_read; /* Version 2 btree counters */ #define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX) - __uint32_t xs_abtb_2[__XBTS_MAX]; + uint32_t xs_abtb_2[__XBTS_MAX]; #define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX) - __uint32_t xs_abtc_2[__XBTS_MAX]; + uint32_t xs_abtc_2[__XBTS_MAX]; #define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX) - __uint32_t xs_bmbt_2[__XBTS_MAX]; + uint32_t xs_bmbt_2[__XBTS_MAX]; #define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX) - __uint32_t xs_ibt_2[__XBTS_MAX]; + uint32_t xs_ibt_2[__XBTS_MAX]; #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX) - __uint32_t xs_fibt_2[__XBTS_MAX]; + uint32_t xs_fibt_2[__XBTS_MAX]; #define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX) - __uint32_t xs_rmap_2[__XBTS_MAX]; + uint32_t xs_rmap_2[__XBTS_MAX]; #define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX) - __uint32_t xs_refcbt_2[__XBTS_MAX]; + uint32_t xs_refcbt_2[__XBTS_MAX]; #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6) - __uint32_t xs_qm_dqreclaims; - __uint32_t xs_qm_dqreclaim_misses; - __uint32_t xs_qm_dquot_dups; - __uint32_t xs_qm_dqcachemisses; - __uint32_t xs_qm_dqcachehits; - __uint32_t xs_qm_dqwants; + uint32_t xs_qm_dqreclaims; + uint32_t xs_qm_dqreclaim_misses; + uint32_t xs_qm_dquot_dups; + uint32_t xs_qm_dqcachemisses; + uint32_t xs_qm_dqcachehits; + uint32_t xs_qm_dqwants; #define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2) - __uint32_t xs_qm_dquot; - __uint32_t xs_qm_dquot_unused; + uint32_t xs_qm_dquot; + uint32_t xs_qm_dquot_unused; /* Extra precision counters */ - __uint64_t xs_xstrat_bytes; - __uint64_t xs_write_bytes; - __uint64_t xs_read_bytes; + uint64_t xs_xstrat_bytes; + uint64_t xs_write_bytes; + uint64_t xs_read_bytes; }; struct xfsstats { @@ -186,7 +186,7 @@ struct xfsstats { * simple wrapper for getting the array index of s struct member offset */ #define XFS_STATS_CALC_INDEX(member) \ - (offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t)) + (offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t)) int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index eecbaac08eba..38aaacdbb8b3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -52,6 +52,7 @@ #include "xfs_reflink.h" #include <linux/namei.h> +#include <linux/dax.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/mount.h> @@ -195,7 +196,7 @@ xfs_parseargs( int dsunit = 0; int dswidth = 0; int iosize = 0; - __uint8_t iosizelog = 0; + uint8_t iosizelog = 0; /* * set up the mount name first so all the errors will refer to the @@ -555,7 +556,7 @@ xfs_showargs( return 0; } -static __uint64_t +static uint64_t xfs_max_file_offset( unsigned int blockshift) { @@ -586,7 +587,7 @@ xfs_max_file_offset( # endif #endif - return (((__uint64_t)pagefactor) << bitshift) - 1; + return (((uint64_t)pagefactor) << bitshift) - 1; } /* @@ -621,7 +622,7 @@ xfs_set_inode_alloc( * the max inode percentage. Used only for inode32. */ if (mp->m_maxicount) { - __uint64_t icount; + uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); @@ -877,8 +878,15 @@ xfs_init_mount_workqueues( if (!mp->m_eofblocks_workqueue) goto out_destroy_log; + mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, + mp->m_fsname); + if (!mp->m_sync_workqueue) + goto out_destroy_eofb; + return 0; +out_destroy_eofb: + destroy_workqueue(mp->m_eofblocks_workqueue); out_destroy_log: destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: @@ -899,6 +907,7 @@ STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { + destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); @@ -953,7 +962,7 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_remove); if (xfs_is_reflink_inode(ip)) { - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) xfs_warn(ip->i_mount, "Error %d while evicting CoW blocks for inode %llu.", @@ -1079,12 +1088,12 @@ xfs_fs_statfs( struct xfs_mount *mp = XFS_M(dentry->d_sb); xfs_sb_t *sbp = &mp->m_sb; struct xfs_inode *ip = XFS_I(d_inode(dentry)); - __uint64_t fakeinos, id; - __uint64_t icount; - __uint64_t ifree; - __uint64_t fdblocks; + uint64_t fakeinos, id; + uint64_t icount; + uint64_t ifree; + uint64_t fdblocks; xfs_extlen_t lsize; - __int64_t ffree; + int64_t ffree; statp->f_type = XFS_SB_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -1107,7 +1116,7 @@ xfs_fs_statfs( statp->f_bavail = statp->f_bfree; fakeinos = statp->f_bfree << sbp->sb_inopblog; - statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); if (mp->m_maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, @@ -1120,7 +1129,7 @@ xfs_fs_statfs( /* make sure statp->f_ffree does not underflow */ ffree = statp->f_files - (icount - ifree); - statp->f_ffree = max_t(__int64_t, ffree, 0); + statp->f_ffree = max_t(int64_t, ffree, 0); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && @@ -1133,7 +1142,7 @@ xfs_fs_statfs( STATIC void xfs_save_resvblks(struct xfs_mount *mp) { - __uint64_t resblks = 0; + uint64_t resblks = 0; mp->m_resblks_save = mp->m_resblks; xfs_reserve_blocks(mp, &resblks, NULL); @@ -1142,7 +1151,7 @@ xfs_save_resvblks(struct xfs_mount *mp) STATIC void xfs_restore_resvblks(struct xfs_mount *mp) { - __uint64_t resblks; + uint64_t resblks; if (mp->m_resblks_save) { resblks = mp->m_resblks_save; @@ -1757,7 +1766,8 @@ STATIC int __init xfs_init_zones(void) { xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, - offsetof(struct xfs_ioend, io_inline_bio)); + offsetof(struct xfs_ioend, io_inline_bio), + BIOSET_NEED_BVECS); if (!xfs_ioend_bioset) goto out; @@ -1956,12 +1966,20 @@ xfs_init_workqueues(void) if (!xfs_alloc_wq) return -ENOMEM; + xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0); + if (!xfs_discard_wq) + goto out_free_alloc_wq; + return 0; +out_free_alloc_wq: + destroy_workqueue(xfs_alloc_wq); + return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { + destroy_workqueue(xfs_discard_wq); destroy_workqueue(xfs_alloc_wq); } diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index b6418abd85ad..5f2f32408011 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -73,6 +73,8 @@ extern const struct quotactl_ops xfs_quotactl_operations; extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); +extern struct workqueue_struct *xfs_discard_wq; + #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) #endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index f2cb45ed1d54..23a50d7aa46a 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -43,8 +43,8 @@ #include "xfs_log.h" /* ----- Kernel only functions below ----- */ -STATIC int -xfs_readlink_bmap( +int +xfs_readlink_bmap_ilocked( struct xfs_inode *ip, char *link) { @@ -61,6 +61,8 @@ xfs_readlink_bmap( int fsblocks = 0; int offset; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + fsblocks = xfs_symlink_blocks(mp, pathlen); error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0); if (error) @@ -143,7 +145,7 @@ xfs_readlink( if (!pathlen) goto out; - if (pathlen < 0 || pathlen > MAXPATHLEN) { + if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", __func__, (unsigned long long) ip->i_ino, (long long) pathlen); @@ -153,7 +155,7 @@ xfs_readlink( } - error = xfs_readlink_bmap(ip, link); + error = xfs_readlink_bmap_ilocked(ip, link); out: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -202,7 +204,7 @@ xfs_symlink( * Check component lengths of the target path name. */ pathlen = strlen(target_path); - if (pathlen >= MAXPATHLEN) /* total string too long */ + if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */ return -ENAMETOOLONG; udqp = gdqp = NULL; @@ -559,7 +561,7 @@ xfs_inactive_symlink( return 0; } - if (pathlen < 0 || pathlen > MAXPATHLEN) { + if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", __func__, (unsigned long long)ip->i_ino, pathlen); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index e75245d09116..aeaee8923617 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -21,6 +21,7 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); +int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link); int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_inactive_symlink(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index 984a3499cfe3..82afee005140 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -95,6 +95,7 @@ extern xfs_param_t xfs_params; struct xfs_globals { int log_recovery_delay; /* log recovery delay (secs) */ + bool bug_on_assert; /* BUG() the kernel on assert failure */ }; extern struct xfs_globals xfs_globals; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index de6195e38910..8b2ccc234f36 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -90,15 +90,25 @@ to_mp(struct kobject *kobject) return container_of(kobj, struct xfs_mount, m_kobj); } +static struct attribute *xfs_mp_attrs[] = { + NULL, +}; + +struct kobj_type xfs_mp_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_mp_attrs, +}; + #ifdef DEBUG +/* debug */ STATIC ssize_t -fail_writes_store( +bug_on_assert_store( struct kobject *kobject, const char *buf, size_t count) { - struct xfs_mount *mp = to_mp(kobject); int ret; int val; @@ -107,9 +117,9 @@ fail_writes_store( return ret; if (val == 1) - mp->m_fail_writes = true; + xfs_globals.bug_on_assert = true; else if (val == 0) - mp->m_fail_writes = false; + xfs_globals.bug_on_assert = false; else return -EINVAL; @@ -117,33 +127,13 @@ fail_writes_store( } STATIC ssize_t -fail_writes_show( +bug_on_assert_show( struct kobject *kobject, char *buf) { - struct xfs_mount *mp = to_mp(kobject); - - return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0); + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0); } -XFS_SYSFS_ATTR_RW(fail_writes); - -#endif /* DEBUG */ - -static struct attribute *xfs_mp_attrs[] = { -#ifdef DEBUG - ATTR_LIST(fail_writes), -#endif - NULL, -}; - -struct kobj_type xfs_mp_ktype = { - .release = xfs_sysfs_release, - .sysfs_ops = &xfs_sysfs_ops, - .default_attrs = xfs_mp_attrs, -}; - -#ifdef DEBUG -/* debug */ +XFS_SYSFS_ATTR_RW(bug_on_assert); STATIC ssize_t log_recovery_delay_store( @@ -176,6 +166,7 @@ log_recovery_delay_show( XFS_SYSFS_ATTR_RW(log_recovery_delay); static struct attribute *xfs_dbg_attrs[] = { + ATTR_LIST(bug_on_assert), ATTR_LIST(log_recovery_delay), NULL, }; @@ -314,47 +305,11 @@ write_grant_head_show( } XFS_SYSFS_ATTR_RO(write_grant_head); -#ifdef DEBUG -STATIC ssize_t -log_badcrc_factor_store( - struct kobject *kobject, - const char *buf, - size_t count) -{ - struct xlog *log = to_xlog(kobject); - int ret; - uint32_t val; - - ret = kstrtouint(buf, 0, &val); - if (ret) - return ret; - - log->l_badcrc_factor = val; - - return count; -} - -STATIC ssize_t -log_badcrc_factor_show( - struct kobject *kobject, - char *buf) -{ - struct xlog *log = to_xlog(kobject); - - return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor); -} - -XFS_SYSFS_ATTR_RW(log_badcrc_factor); -#endif /* DEBUG */ - static struct attribute *xfs_log_attrs[] = { ATTR_LIST(log_head_lsn), ATTR_LIST(log_tail_lsn), ATTR_LIST(reserve_grant_head), ATTR_LIST(write_grant_head), -#ifdef DEBUG - ATTR_LIST(log_badcrc_factor), -#endif NULL, }; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 7f17ae6d709a..5d95fe348294 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -47,6 +47,7 @@ #include "xfs_inode_item.h" #include "xfs_bmap_btree.h" #include "xfs_filestream.h" +#include "xfs_fsmap.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 69c5bcd9a51b..bcc3cdf8e1c5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -40,6 +40,8 @@ struct xfs_inode_log_format; struct xfs_bmbt_irec; struct xfs_btree_cur; struct xfs_refcount_irec; +struct xfs_fsmap; +struct xfs_rmap_irec; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -249,7 +251,7 @@ TRACE_EVENT(xfs_iext_insert, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), (long)__entry->idx, __entry->startoff, - (__int64_t)__entry->startblock, + (int64_t)__entry->startblock, __entry->blockcount, __entry->state, (char *)__entry->caller_ip) @@ -293,7 +295,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), (long)__entry->idx, __entry->startoff, - (__int64_t)__entry->startblock, + (int64_t)__entry->startblock, __entry->blockcount, __entry->state, (char *)__entry->caller_ip) @@ -365,6 +367,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); +DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); @@ -687,7 +690,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); -DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); +DEFINE_INODE_EVENT(xfs_filemap_huge_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); @@ -1278,7 +1281,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class, __entry->count, __print_symbolic(__entry->type, XFS_IO_TYPES), __entry->startoff, - (__int64_t)__entry->startblock, + (int64_t)__entry->startblock, __entry->blockcount) ) @@ -1488,25 +1491,6 @@ TRACE_EVENT(xfs_extent_busy_trim, __entry->tlen) ); -TRACE_EVENT(xfs_trans_commit_lsn, - TP_PROTO(struct xfs_trans *trans), - TP_ARGS(trans), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct xfs_trans *, tp) - __field(xfs_lsn_t, lsn) - ), - TP_fast_assign( - __entry->dev = trans->t_mountp->m_super->s_dev; - __entry->tp = trans; - __entry->lsn = trans->t_commit_lsn; - ), - TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tp, - __entry->lsn) -); - TRACE_EVENT(xfs_agf, TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, unsigned long caller_ip), @@ -2055,7 +2039,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, TP_ARGS(log, buf_f), TP_STRUCT__entry( __field(dev_t, dev) - __field(__int64_t, blkno) + __field(int64_t, blkno) __field(unsigned short, len) __field(unsigned short, flags) __field(unsigned short, size) @@ -2104,7 +2088,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, __field(int, fields) __field(unsigned short, asize) __field(unsigned short, dsize) - __field(__int64_t, blkno) + __field(int64_t, blkno) __field(int, len) __field(int, boffset) ), @@ -2190,7 +2174,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno %u agbno %u len %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2245,7 +2229,6 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); /* deferred ops */ struct xfs_defer_pending; -struct xfs_defer_intake; struct xfs_defer_ops; DECLARE_EVENT_CLASS(xfs_defer_class, @@ -2254,8 +2237,8 @@ DECLARE_EVENT_CLASS(xfs_defer_class, TP_STRUCT__entry( __field(dev_t, dev) __field(void *, dop) - __field(bool, committed) - __field(bool, low) + __field(char, committed) + __field(char, low) ), TP_fast_assign( __entry->dev = mp ? mp->m_super->s_dev : 0; @@ -2263,7 +2246,7 @@ DECLARE_EVENT_CLASS(xfs_defer_class, __entry->committed = dop->dop_committed; __entry->low = dop->dop_low; ), - TP_printk("dev %d:%d ops %p committed %d low %d\n", + TP_printk("dev %d:%d ops %p committed %d low %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dop, __entry->committed, @@ -2280,8 +2263,8 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class, TP_STRUCT__entry( __field(dev_t, dev) __field(void *, dop) - __field(bool, committed) - __field(bool, low) + __field(char, committed) + __field(char, low) __field(int, error) ), TP_fast_assign( @@ -2291,7 +2274,7 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class, __entry->low = dop->dop_low; __entry->error = error; ), - TP_printk("dev %d:%d ops %p committed %d low %d err %d\n", + TP_printk("dev %d:%d ops %p committed %d low %d err %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dop, __entry->committed, @@ -2310,7 +2293,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __field(dev_t, dev) __field(int, type) __field(void *, intent) - __field(bool, committed) + __field(char, committed) __field(int, nr) ), TP_fast_assign( @@ -2320,7 +2303,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __entry->committed = dfp->dfp_done != NULL; __entry->nr = dfp->dfp_count; ), - TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", + TP_printk("dev %d:%d optype %d intent %p committed %d nr %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->intent, @@ -2615,7 +2598,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class, __entry->asked = r ? r->ar_asked : 0; __entry->len = len; ), - TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n", + TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u " + "resv %u ask %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->resv, @@ -2668,7 +2652,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class, __entry->agbno = agbno; __entry->dir = dir; ), - TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n", + TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2701,7 +2685,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2736,7 +2720,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2777,7 +2761,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, __entry->i2_refcount = i2->rc_refcount; ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u\n", + "agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2823,7 +2807,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, __entry->agbno = agbno; ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u @ agbno %u\n", + "agbno %u len %u refcount %u @ agbno %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2876,7 +2860,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " "agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u\n", + "agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -3002,31 +2986,6 @@ DEFINE_EVENT(xfs_inode_error_class, name, \ unsigned long caller_ip), \ TP_ARGS(ip, error, caller_ip)) -/* reflink allocator */ -TRACE_EVENT(xfs_bmap_remap_alloc, - TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno, - xfs_extlen_t len), - TP_ARGS(ip, fsbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsblock_t, fsbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->fsbno = fsbno; - __entry->len = len; - ), - TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->fsbno, - __entry->len) -); -DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error); - /* reflink tracepoint classes */ /* two-file io tracepoint class */ @@ -3089,6 +3048,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, __field(xfs_fileoff_t, lblk) __field(xfs_extlen_t, len) __field(xfs_fsblock_t, pblk) + __field(int, state) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; @@ -3096,13 +3056,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, __entry->lblk = irec->br_startoff; __entry->len = irec->br_blockcount; __entry->pblk = irec->br_startblock; + __entry->state = irec->br_state; ), - TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->lblk, __entry->len, - __entry->pblk) + __entry->pblk, + __entry->state) ); #define DEFINE_INODE_IREC_EVENT(name) \ DEFINE_EVENT(xfs_inode_irec_class, name, \ @@ -3225,7 +3187,7 @@ TRACE_EVENT(xfs_ioctl_clone, ), TP_printk("dev %d:%d " "ino 0x%lx isize 0x%llx -> " - "ino 0x%lx isize 0x%llx\n", + "ino 0x%lx isize 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_ino, __entry->src_isize, @@ -3242,11 +3204,11 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); +DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); DEFINE_RW_EVENT(xfs_reflink_reserve_cow); -DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); -DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); @@ -3254,7 +3216,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); -DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); @@ -3266,6 +3227,88 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap); DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); +/* fsmap traces */ +DECLARE_EVENT_CLASS(xfs_fsmap_class, + TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, + struct xfs_rmap_irec *rmap), + TP_ARGS(mp, keydev, agno, rmap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(xfs_agnumber_t, agno) + __field(xfs_fsblock_t, bno) + __field(xfs_filblks_t, len) + __field(uint64_t, owner) + __field(uint64_t, offset) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->keydev = new_decode_dev(keydev); + __entry->agno = agno; + __entry->bno = rmap->rm_startblock; + __entry->len = rmap->rm_blockcount; + __entry->owner = rmap->rm_owner; + __entry->offset = rmap->rm_offset; + __entry->flags = rmap->rm_flags; + ), + TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->agno, + __entry->bno, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +) +#define DEFINE_FSMAP_EVENT(name) \ +DEFINE_EVENT(xfs_fsmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \ + struct xfs_rmap_irec *rmap), \ + TP_ARGS(mp, keydev, agno, rmap)) +DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); +DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); +DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); + +DECLARE_EVENT_CLASS(xfs_getfsmap_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), + TP_ARGS(mp, fsmap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(xfs_daddr_t, block) + __field(xfs_daddr_t, len) + __field(uint64_t, owner) + __field(uint64_t, offset) + __field(uint64_t, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->keydev = new_decode_dev(fsmap->fmr_device); + __entry->block = fsmap->fmr_physical; + __entry->len = fsmap->fmr_length; + __entry->owner = fsmap->fmr_owner; + __entry->offset = fsmap->fmr_offset; + __entry->flags = fsmap->fmr_flags; + ), + TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->block, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +) +#define DEFINE_GETFSMAP_EVENT(name) \ +DEFINE_EVENT(xfs_getfsmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \ + TP_ARGS(mp, fsmap)) +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 70f42ea86dfb..2011620008de 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -134,7 +134,7 @@ xfs_trans_reserve( bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); /* * Attempt to reserve the needed disk blocks by decrementing @@ -144,7 +144,7 @@ xfs_trans_reserve( if (blocks > 0) { error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return -ENOSPC; } tp->t_blk_res += blocks; @@ -221,7 +221,7 @@ undo_blocks: tp->t_blk_res = 0; } - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return error; } @@ -263,6 +263,28 @@ xfs_trans_alloc( } /* + * Create an empty transaction with no reservation. This is a defensive + * mechanism for routines that query metadata without actually modifying + * them -- if the metadata being queried is somehow cross-linked (think a + * btree block pointer that points higher in the tree), we risk deadlock. + * However, blocks grabbed as part of a transaction can be re-grabbed. + * The verifiers will notice the corrupt block and the operation will fail + * back to userspace without deadlocking. + * + * Note the zero-length reservation; this transaction MUST be cancelled + * without any dirty data. + */ +int +xfs_trans_alloc_empty( + struct xfs_mount *mp, + struct xfs_trans **tpp) +{ + struct xfs_trans_res resv = {0}; + + return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); +} + +/* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. * For now, just store the change in the transaction structure. @@ -914,7 +936,7 @@ __xfs_trans_commit( xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp); /* @@ -944,7 +966,7 @@ out_unreserve: if (commit_lsn == -1 && !error) error = -EIO; } - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); @@ -998,7 +1020,7 @@ xfs_trans_cancel( xfs_log_done(mp, tp->t_ticket, NULL, false); /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); xfs_trans_free(tp); @@ -1012,17 +1034,14 @@ xfs_trans_cancel( * chunk we've been working on and get a new transaction to continue. */ int -__xfs_trans_roll( +xfs_trans_roll( struct xfs_trans **tpp, - struct xfs_inode *dp, - int *committed) + struct xfs_inode *dp) { struct xfs_trans *trans; struct xfs_trans_res tres; int error; - *committed = 0; - /* * Ensure that the inode is always logged. */ @@ -1048,7 +1067,6 @@ __xfs_trans_roll( if (error) return error; - *committed = 1; trans = *tpp; /* @@ -1071,12 +1089,3 @@ __xfs_trans_roll( xfs_trans_ijoin(trans, dp, 0); return 0; } - -int -xfs_trans_roll( - struct xfs_trans **tpp, - struct xfs_inode *dp) -{ - int committed; - return __xfs_trans_roll(tpp, dp, &committed); -} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 61b7fbdd3ebd..6bdad6f58934 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -32,7 +32,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_trans_res; struct xfs_dquot_acct; -struct xfs_busy_extent; struct xfs_rud_log_item; struct xfs_rui_log_item; struct xfs_btree_cur; @@ -106,10 +105,6 @@ typedef struct xfs_trans { unsigned int t_rtx_res; /* # of rt extents resvd */ unsigned int t_rtx_res_used; /* # of resvd rt extents used */ struct xlog_ticket *t_ticket; /* log mgr ticket */ - xfs_lsn_t t_lsn; /* log seq num of start of - * transaction. */ - xfs_lsn_t t_commit_lsn; /* log seq num of end of - * transaction. */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ unsigned int t_flags; /* misc flags */ @@ -159,6 +154,8 @@ typedef struct xfs_trans { int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp); +int xfs_trans_alloc_empty(struct xfs_mount *mp, + struct xfs_trans **tpp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, @@ -227,7 +224,6 @@ int xfs_trans_free_extent(struct xfs_trans *, struct xfs_efd_log_item *, xfs_fsblock_t, xfs_extlen_t, struct xfs_owner_info *); int xfs_trans_commit(struct xfs_trans *); -int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_cancel(xfs_trans_t *); int xfs_trans_ail_init(struct xfs_mount *); @@ -249,7 +245,7 @@ struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, struct xfs_rui_log_item *ruip); int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, - __uint64_t owner, int whichfork, xfs_fileoff_t startoff, + uint64_t owner, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_exntst_t state, struct xfs_btree_cur **pcur); @@ -275,6 +271,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops, enum xfs_bmap_intent_type type, struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, xfs_exntst_t state); + xfs_filblks_t *blockcount, xfs_exntst_t state); #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d6c9c3e9e02b..9056c0f34a3c 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk( } } -/* - * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL +bool +xfs_ail_delete_one( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_log_item *mlip = xfs_ail_min(ailp); + + trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); + xfs_ail_delete(ailp, lip); + lip->li_flags &= ~XFS_LI_IN_AIL; + lip->li_lsn = 0; + + return mlip == lip; +} + +/** + * Remove a log items from the AIL * * @xfs_trans_ail_delete_bulk takes an array of log items that all need to * removed from the AIL. The caller is already holding the AIL lock, and done @@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk( * before returning. */ void -xfs_trans_ail_delete_bulk( +xfs_trans_ail_delete( struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items, + struct xfs_log_item *lip, int shutdown_type) __releases(ailp->xa_lock) { - xfs_log_item_t *mlip; - int mlip_changed = 0; - int i; + struct xfs_mount *mp = ailp->xa_mount; + bool mlip_changed; - mlip = xfs_ail_min(ailp); - - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - if (!(lip->li_flags & XFS_LI_IN_AIL)) { - struct xfs_mount *mp = ailp->xa_mount; - - spin_unlock(&ailp->xa_lock); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_alert_tag(mp, XFS_PTAG_AILDELETE, - "%s: attempting to delete a log item that is not in the AIL", - __func__); - xfs_force_shutdown(mp, shutdown_type); - } - return; + if (!(lip->li_flags & XFS_LI_IN_AIL)) { + spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_alert_tag(mp, XFS_PTAG_AILDELETE, + "%s: attempting to delete a log item that is not in the AIL", + __func__); + xfs_force_shutdown(mp, shutdown_type); } - - trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); - xfs_ail_delete(ailp, lip); - lip->li_flags &= ~XFS_LI_IN_AIL; - lip->li_lsn = 0; - if (mlip == lip) - mlip_changed = 1; + return; } + mlip_changed = xfs_ail_delete_one(ailp, lip); if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (!XFS_FORCED_SHUTDOWN(mp)) + xlog_assign_tail_lsn_locked(mp); if (list_empty(&ailp->xa_ail)) wake_up_all(&ailp->xa_empty); - spin_unlock(&ailp->xa_lock); + } + spin_unlock(&ailp->xa_lock); + if (mlip_changed) xfs_log_space_wake(ailp->xa_mount); - } else { - spin_unlock(&ailp->xa_lock); - } } int diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c index 6408e7d7c08c..14543d93cd4b 100644 --- a/fs/xfs/xfs_trans_bmap.c +++ b/fs/xfs/xfs_trans_bmap.c @@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update( int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t blockcount, + xfs_filblks_t *blockcount, xfs_exntst_t state) { int error; @@ -196,16 +196,23 @@ xfs_bmap_update_finish_item( void **state) { struct xfs_bmap_intent *bmap; + xfs_filblks_t count; int error; bmap = container_of(item, struct xfs_bmap_intent, bi_list); + count = bmap->bi_bmap.br_blockcount; error = xfs_trans_log_finish_bmap_update(tp, done_item, dop, bmap->bi_type, bmap->bi_owner, bmap->bi_whichfork, bmap->bi_bmap.br_startoff, bmap->bi_bmap.br_startblock, - bmap->bi_bmap.br_blockcount, + &count, bmap->bi_bmap.br_state); + if (!error && count > 0) { + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); + bmap->bi_bmap.br_blockcount = count; + return -EAGAIN; + } kmem_free(bmap); return error; } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee29ca132dc..86987d823d76 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) { xfs_buf_log_item_t *bip; + int freed; /* * Default to a normal brelse() call if the tp is NULL. @@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t *tp, /* * Drop our reference to the buf log item. */ - atomic_dec(&bip->bli_refcount); + freed = atomic_dec_and_test(&bip->bli_refcount); /* - * If the buf item is not tracking data in the log, then - * we must free it before releasing the buffer back to the - * free pool. Before releasing the buffer to the free pool, - * clear the transaction pointer in b_fsprivate2 to dissolve - * its relation to this transaction. + * If the buf item is not tracking data in the log, then we must free it + * before releasing the buffer back to the free pool. + * + * If the fs has shutdown and we dropped the last reference, it may fall + * on us to release a (possibly dirty) bli if it never made it to the + * AIL (e.g., the aborted unpin already happened and didn't release it + * due to our reference). Since we're already shutdown and need xa_lock, + * just force remove from the AIL and release the bli here. */ - if (!xfs_buf_item_dirty(bip)) { + if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { + xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bp); + } else if (!xfs_buf_item_dirty(bip)) { /*** ASSERT(bp->b_pincount == 0); ***/ diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 49931b72da8a..d91706c56c63 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -106,18 +106,9 @@ xfs_trans_ail_update( xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } -void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, - struct xfs_log_item **log_items, int nr_items, - int shutdown_type) - __releases(ailp->xa_lock); -static inline void -xfs_trans_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip, - int shutdown_type) __releases(ailp->xa_lock) -{ - xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); -} +bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, + int shutdown_type) __releases(ailp->xa_lock); static inline void xfs_trans_ail_remove( diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c index 9ead064b5e90..9b577beb43d7 100644 --- a/fs/xfs/xfs_trans_rmap.c +++ b/fs/xfs/xfs_trans_rmap.c @@ -96,7 +96,7 @@ xfs_trans_log_finish_rmap_update( struct xfs_trans *tp, struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, - __uint64_t owner, + uint64_t owner, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, |