aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/kmem.c33
-rw-r--r--fs/xfs/kmem.h14
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c174
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h18
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c178
-rw-r--r--fs/xfs/libxfs/xfs_attr.c28
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c13
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/libxfs/xfs_bit.h24
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c627
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h16
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c89
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h22
-rw-r--r--fs/xfs/libxfs/xfs_btree.c105
-rw-r--r--fs/xfs/libxfs/xfs_btree.h41
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h16
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c20
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h10
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c28
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h64
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h8
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c18
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c61
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c106
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c7
-rw-r--r--fs/xfs/libxfs/xfs_format.h124
-rw-r--r--fs/xfs/libxfs/xfs_fs.h29
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c56
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c36
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h31
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c112
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h256
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h3
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c59
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h16
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c70
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h15
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c34
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c74
-rw-r--r--fs/xfs/libxfs/xfs_sb.c4
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h23
-rw-r--r--fs/xfs/libxfs/xfs_types.h46
-rw-r--r--fs/xfs/uuid.c63
-rw-r--r--fs/xfs/uuid.h35
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_acl.c6
-rw-r--r--fs/xfs/xfs_acl.h1
-rw-r--r--fs/xfs/xfs_aops.c119
-rw-r--r--fs/xfs/xfs_attr.h3
-rw-r--r--fs/xfs/xfs_attr_list.c63
-rw-r--r--fs/xfs/xfs_bmap_item.c23
-rw-r--r--fs/xfs/xfs_bmap_util.c283
-rw-r--r--fs/xfs/xfs_bmap_util.h9
-rw-r--r--fs/xfs/xfs_buf.c143
-rw-r--r--fs/xfs/xfs_buf.h9
-rw-r--r--fs/xfs/xfs_buf_item.c22
-rw-r--r--fs/xfs/xfs_dir2_readdir.c345
-rw-r--r--fs/xfs/xfs_discard.c43
-rw-r--r--fs/xfs/xfs_discard.h1
-rw-r--r--fs/xfs/xfs_dquot.c71
-rw-r--r--fs/xfs/xfs_error.c319
-rw-r--r--fs/xfs/xfs_error.h44
-rw-r--r--fs/xfs/xfs_extent_busy.c156
-rw-r--r--fs/xfs/xfs_extent_busy.h11
-rw-r--r--fs/xfs/xfs_extfree_item.c1
-rw-r--r--fs/xfs/xfs_file.c532
-rw-r--r--fs/xfs/xfs_fsmap.c943
-rw-r--r--fs/xfs/xfs_fsmap.h53
-rw-r--r--fs/xfs/xfs_fsops.c55
-rw-r--r--fs/xfs/xfs_fsops.h4
-rw-r--r--fs/xfs/xfs_globals.c5
-rw-r--r--fs/xfs/xfs_icache.c176
-rw-r--r--fs/xfs/xfs_icache.h14
-rw-r--r--fs/xfs/xfs_inode.c84
-rw-r--r--fs/xfs/xfs_inode.h11
-rw-r--r--fs/xfs/xfs_inode_item.c37
-rw-r--r--fs/xfs/xfs_ioctl.c121
-rw-r--r--fs/xfs/xfs_ioctl.h10
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_ioctl32.h6
-rw-r--r--fs/xfs/xfs_iomap.c138
-rw-r--r--fs/xfs/xfs_iomap.h24
-rw-r--r--fs/xfs/xfs_iops.c29
-rw-r--r--fs/xfs/xfs_itable.c10
-rw-r--r--fs/xfs/xfs_itable.h2
-rw-r--r--fs/xfs/xfs_linux.h110
-rw-r--r--fs/xfs/xfs_log.c91
-rw-r--r--fs/xfs/xfs_log.h3
-rw-r--r--fs/xfs/xfs_log_cil.c175
-rw-r--r--fs/xfs/xfs_log_priv.h4
-rw-r--r--fs/xfs/xfs_log_recover.c57
-rw-r--r--fs/xfs/xfs_message.c5
-rw-r--r--fs/xfs/xfs_mount.c82
-rw-r--r--fs/xfs/xfs_mount.h66
-rw-r--r--fs/xfs/xfs_qm.c39
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_quotaops.c1
-rw-r--r--fs/xfs/xfs_refcount_item.c1
-rw-r--r--fs/xfs/xfs_reflink.c426
-rw-r--r--fs/xfs/xfs_reflink.h18
-rw-r--r--fs/xfs/xfs_rmap_item.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c32
-rw-r--r--fs/xfs/xfs_rtalloc.h28
-rw-r--r--fs/xfs/xfs_stats.c8
-rw-r--r--fs/xfs/xfs_stats.h190
-rw-r--r--fs/xfs/xfs_super.c48
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_symlink.c14
-rw-r--r--fs/xfs/xfs_symlink.h1
-rw-r--r--fs/xfs/xfs_sysctl.h1
-rw-r--r--fs/xfs/xfs_sysfs.c81
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h189
-rw-r--r--fs/xfs/xfs_trans.c51
-rw-r--r--fs/xfs/xfs_trans.h12
-rw-r--r--fs/xfs/xfs_trans_ail.c71
-rw-r--r--fs/xfs/xfs_trans_bmap.c11
-rw-r--r--fs/xfs/xfs_trans_buf.c21
-rw-r--r--fs/xfs/xfs_trans_priv.h15
-rw-r--r--fs/xfs/xfs_trans_rmap.c2
134 files changed, 5245 insertions, 3439 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35faf128f36d..1b98cfa342ab 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -96,3 +96,16 @@ config XFS_DEBUG
not useful unless you are debugging a particular problem.
Say N unless you are an XFS developer, or you play one on TV.
+
+config XFS_ASSERT_FATAL
+ bool "XFS fatal asserts"
+ default y
+ depends on XFS_FS && XFS_DEBUG
+ help
+ Set the default DEBUG mode ASSERT failure behavior.
+
+ Say Y here to cause DEBUG mode ASSERT failures to result in fatal
+ errors that BUG() the kernel by default. If you say N, ASSERT failures
+ result in warnings.
+
+ This behavior can be modified at runtime via sysfs.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 26ef1958b65b..a6e955bfead8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -79,6 +79,7 @@ xfs-y += xfs_aops.o \
xfs_extent_busy.o \
xfs_file.o \
xfs_filestream.o \
+ xfs_fsmap.o \
xfs_fsops.o \
xfs_globals.o \
xfs_icache.o \
@@ -97,8 +98,7 @@ xfs-y += xfs_aops.o \
xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
- kmem.o \
- uuid.o
+ kmem.o
# low-level transaction/log code
xfs-y += xfs_log.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 339c696bbc01..393b6849aeb3 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/mm.h>
+#include <linux/sched/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/swap.h>
@@ -24,24 +25,6 @@
#include "kmem.h"
#include "xfs_message.h"
-/*
- * Greedy allocation. May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
- void *ptr;
- size_t kmsize = maxsize;
-
- while (!(ptr = vzalloc(kmsize))) {
- if ((kmsize >>= 1) <= minsize)
- kmsize = minsize;
- }
- if (ptr)
- *size = kmsize;
- return ptr;
-}
-
void *
kmem_alloc(size_t size, xfs_km_flags_t flags)
{
@@ -65,7 +48,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
void *
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
{
- unsigned noio_flag = 0;
+ unsigned nofs_flag = 0;
void *ptr;
gfp_t lflags;
@@ -77,17 +60,17 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
* __vmalloc() will allocate data pages and auxillary structures (e.g.
* pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
* here. Hence we need to tell memory reclaim that we are in such a
- * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+ * context via PF_MEMALLOC_NOFS to prevent memory reclaim re-entering
* the filesystem here and potentially deadlocking.
*/
- if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
- noio_flag = memalloc_noio_save();
+ if (flags & KM_NOFS)
+ nofs_flag = memalloc_nofs_save();
lflags = kmem_flags_convert(flags);
- ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+ ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
- if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
- memalloc_noio_restore(noio_flag);
+ if (flags & KM_NOFS)
+ memalloc_nofs_restore(nofs_flag);
return ptr;
}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7..4d85992d75b2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -50,10 +50,20 @@ kmem_flags_convert(xfs_km_flags_t flags)
lflags = GFP_ATOMIC | __GFP_NOWARN;
} else {
lflags = GFP_KERNEL | __GFP_NOWARN;
- if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ if (flags & KM_NOFS)
lflags &= ~__GFP_FS;
}
+ /*
+ * Default page/slab allocator behavior is to retry for ever
+ * for small allocations. We can override this behavior by using
+ * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
+ * as it is feasible but rather fail than retry forever for all
+ * request sizes.
+ */
+ if (flags & KM_MAYFAIL)
+ lflags |= __GFP_RETRY_MAYFAIL;
+
if (flags & KM_ZERO)
lflags |= __GFP_ZERO;
@@ -69,8 +79,6 @@ static inline void kmem_free(const void *ptr)
}
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
static inline void *
kmem_zalloc(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 33db69be4832..b008ff3250eb 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -111,8 +111,7 @@ xfs_ag_resv_critical(
/* Critically low if less than 10% or max btree height remains. */
return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL,
- XFS_RANDOM_AG_RESV_CRITICAL);
+ pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
}
/*
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9f06a211e157..744dcaec34cc 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -221,20 +221,22 @@ xfs_alloc_get_rec(
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
-STATIC void
+STATIC bool
xfs_alloc_compute_aligned(
xfs_alloc_arg_t *args, /* allocation argument structure */
xfs_agblock_t foundbno, /* starting block in found extent */
xfs_extlen_t foundlen, /* length in found extent */
xfs_agblock_t *resbno, /* result block number */
- xfs_extlen_t *reslen) /* result length */
+ xfs_extlen_t *reslen, /* result length */
+ unsigned *busy_gen)
{
- xfs_agblock_t bno;
- xfs_extlen_t len;
+ xfs_agblock_t bno = foundbno;
+ xfs_extlen_t len = foundlen;
xfs_extlen_t diff;
+ bool busy;
/* Trim busy sections out of found extent */
- xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+ busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
/*
* If we have a largish extent that happens to start before min_agbno,
@@ -259,6 +261,8 @@ xfs_alloc_compute_aligned(
*resbno = bno;
*reslen = len;
}
+
+ return busy;
}
/*
@@ -602,7 +606,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
/*
* Read in the allocation group free block array.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_read_agfl(
xfs_mount_t *mp, /* mount point structure */
xfs_trans_t *tp, /* transaction pointer */
@@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact(
int error;
xfs_agblock_t fbno; /* start block of found extent */
xfs_extlen_t flen; /* length of found extent */
- xfs_agblock_t tbno; /* start block of trimmed extent */
- xfs_extlen_t tlen; /* length of trimmed extent */
- xfs_agblock_t tend; /* end block of trimmed extent */
+ xfs_agblock_t tbno; /* start block of busy extent */
+ xfs_extlen_t tlen; /* length of busy extent */
+ xfs_agblock_t tend; /* end block of busy extent */
int i; /* success/failure of operation */
+ unsigned busy_gen;
ASSERT(args->alignment == 1);
@@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact(
/*
* Check for overlapping busy extents.
*/
- xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+ tbno = fbno;
+ tlen = flen;
+ xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
@@ -853,6 +860,7 @@ xfs_alloc_find_best_extent(
xfs_agblock_t sdiff;
int error;
int i;
+ unsigned busy_gen;
/* The good extent is perfect, no need to search. */
if (!gdiff)
@@ -866,7 +874,8 @@ xfs_alloc_find_best_extent(
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
+ xfs_alloc_compute_aligned(args, *sbno, *slen,
+ sbnoa, slena, &busy_gen);
/*
* The good extent is closer than this one.
@@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near(
xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */
xfs_extlen_t rlen; /* length of returned extent */
- int forced = 0;
+ bool busy;
+ unsigned busy_gen;
#ifdef DEBUG
/*
* Randomly don't execute the first algorithm.
@@ -982,6 +992,7 @@ restart:
ltlen = 0;
gtlena = 0;
ltlena = 0;
+ busy = false;
/*
* Get a cursor for the by-size btree.
@@ -1064,8 +1075,8 @@ restart:
if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena);
+ busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena, &busy_gen);
if (ltlena < args->minlen)
continue;
if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
@@ -1183,8 +1194,8 @@ restart:
if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena);
+ busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena, &busy_gen);
if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
break;
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1199,8 +1210,8 @@ restart:
if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, gtbno, gtlen,
- &gtbnoa, &gtlena);
+ busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
+ &gtbnoa, &gtlena, &busy_gen);
if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
break;
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1261,9 +1272,9 @@ restart:
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- if (!forced++) {
+ if (busy) {
trace_xfs_alloc_near_busy(args);
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
goto restart;
}
trace_xfs_alloc_size_neither(args);
@@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size(
int i; /* temp status variable */
xfs_agblock_t rbno; /* returned block number */
xfs_extlen_t rlen; /* length of returned extent */
- int forced = 0;
+ bool busy;
+ unsigned busy_gen;
restart:
/*
@@ -1353,6 +1365,7 @@ restart:
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
+ busy = false;
/*
* Look for an entry >= maxlen+alignment-1 blocks.
@@ -1362,14 +1375,13 @@ restart:
goto error0;
/*
- * If none or we have busy extents that we cannot allocate from, then
- * we have to settle for a smaller extent. In the case that there are
- * no large extents, this will return the last entry in the tree unless
- * the tree is empty. In the case that there are only busy large
- * extents, this will return the largest small extent unless there
+ * If none then we have to settle for a smaller extent. In the case that
+ * there are no large extents, this will return the last entry in the
+ * tree unless the tree is empty. In the case that there are only busy
+ * large extents, this will return the largest small extent unless there
* are no smaller extents available.
*/
- if (!i || forced > 1) {
+ if (!i) {
error = xfs_alloc_ag_vextent_small(args, cnt_cur,
&fbno, &flen, &i);
if (error)
@@ -1380,13 +1392,11 @@ restart:
return 0;
}
ASSERT(i == 1);
- xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
+ &rlen, &busy_gen);
} else {
/*
* Search for a non-busy extent that is large enough.
- * If we are at low space, don't check, or if we fall of
- * the end of the btree, turn off the busy check and
- * restart.
*/
for (;;) {
error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
@@ -1394,8 +1404,8 @@ restart:
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, fbno, flen,
- &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen, &busy_gen);
if (rlen >= args->maxlen)
break;
@@ -1407,18 +1417,13 @@ restart:
/*
* Our only valid extents must have been busy.
* Make it unbusy by forcing the log out and
- * retrying. If we've been here before, forcing
- * the log isn't making the extents available,
- * which means they have probably been freed in
- * this transaction. In that case, we have to
- * give up on them and we'll attempt a minlen
- * allocation the next time around.
+ * retrying.
*/
xfs_btree_del_cursor(cnt_cur,
XFS_BTREE_NOERROR);
trace_xfs_alloc_size_busy(args);
- if (!forced++)
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp,
+ args->pag, busy_gen);
goto restart;
}
}
@@ -1454,8 +1459,8 @@ restart:
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
if (flen < bestrlen)
break;
- xfs_alloc_compute_aligned(args, fbno, flen,
- &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen, &busy_gen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1484,10 +1489,10 @@ restart:
*/
args->len = rlen;
if (rlen < args->minlen) {
- if (!forced++) {
+ if (busy) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
trace_xfs_alloc_size_busy(args);
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
goto restart;
}
goto out_nominleft;
@@ -2449,8 +2454,7 @@ xfs_agf_read_verify(
!xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
- XFS_ERRTAG_ALLOC_READ_AGF,
- XFS_RANDOM_ALLOC_READ_AGF))
+ XFS_ERRTAG_ALLOC_READ_AGF))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
@@ -2659,21 +2663,11 @@ xfs_alloc_vextent(
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO;
/* FALLTHROUGH */
- case XFS_ALLOCTYPE_ANY_AG:
- case XFS_ALLOCTYPE_START_AG:
case XFS_ALLOCTYPE_FIRST_AG:
/*
* Rotate through the allocation groups looking for a winner.
*/
- if (type == XFS_ALLOCTYPE_ANY_AG) {
- /*
- * Start with the last place we left off.
- */
- args->agno = sagno = (mp->m_agfrotor / rotorstep) %
- mp->m_sb.sb_agcount;
- args->type = XFS_ALLOCTYPE_THIS_AG;
- flags = XFS_ALLOC_FLAG_TRYLOCK;
- } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
+ if (type == XFS_ALLOCTYPE_FIRST_AG) {
/*
* Start with allocation group given by bno.
*/
@@ -2682,8 +2676,6 @@ xfs_alloc_vextent(
sagno = 0;
flags = 0;
} else {
- if (type == XFS_ALLOCTYPE_START_AG)
- args->type = XFS_ALLOCTYPE_THIS_AG;
/*
* Start with the given allocation group.
*/
@@ -2751,7 +2743,7 @@ xfs_alloc_vextent(
}
xfs_perag_put(args->pag);
}
- if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
+ if (bump_rotor) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep);
@@ -2849,8 +2841,7 @@ xfs_free_extent(
ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_FREE_EXTENT,
- XFS_RANDOM_FREE_EXTENT))
+ XFS_ERRTAG_FREE_EXTENT))
return -EIO;
error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
@@ -2875,3 +2866,60 @@ err:
xfs_trans_brelse(tp, agbp);
return error;
}
+
+struct xfs_alloc_query_range_info {
+ xfs_alloc_query_range_fn fn;
+ void *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_alloc_query_range_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info *query = priv;
+ struct xfs_alloc_rec_incore irec;
+
+ irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+ return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all free space within a given range of blocks. */
+int
+xfs_alloc_query_range(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ low_brec.a = *low_rec;
+ high_brec.a = *high_rec;
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_range(cur, &low_brec, &high_brec,
+ xfs_alloc_query_range_helper, &query);
+}
+
+/* Find all free space records. */
+int
+xfs_alloc_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 1d0f48a501a3..ef26edc2e938 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -29,9 +29,7 @@ extern struct workqueue_struct *xfs_alloc_wq;
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
-#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
-#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
@@ -41,9 +39,7 @@ extern struct workqueue_struct *xfs_alloc_wq;
typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_TYPES \
- { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
{ XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
- { XFS_ALLOCTYPE_START_AG, "START_AG" }, \
{ XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
{ XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
{ XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
@@ -217,10 +213,24 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, struct xfs_buf **bpp);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **agbp);
xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+typedef int (*xfs_alloc_query_range_fn)(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv);
+
+int xfs_alloc_query_range(struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn, void *priv);
+int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
+ void *priv);
+
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index efb467b10a71..cfde0a0f9706 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -205,19 +205,37 @@ xfs_allocbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- ASSERT(rec->alloc.ar_startblock != 0);
-
key->alloc.ar_startblock = rec->alloc.ar_startblock;
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
STATIC void
+xfs_bnobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->alloc.ar_startblock);
+ x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
+ key->alloc.ar_startblock = cpu_to_be32(x);
+ key->alloc.ar_blockcount = 0;
+}
+
+STATIC void
+xfs_cntbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
+ key->alloc.ar_startblock = 0;
+}
+
+STATIC void
xfs_allocbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- ASSERT(cur->bc_rec.a.ar_startblock != 0);
-
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
@@ -235,25 +253,58 @@ xfs_allocbt_init_ptr_from_cur(
ptr->s = agf->agf_roots[cur->bc_btnum];
}
-STATIC __int64_t
-xfs_allocbt_key_diff(
+STATIC int64_t
+xfs_bnobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- __int64_t diff;
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return (__int64_t)be32_to_cpu(kp->ar_startblock) -
- rec->ar_startblock;
- }
+ return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+STATIC int64_t
+xfs_cntbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ int64_t diff;
+
+ diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ if (diff)
+ return diff;
+
+ return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+STATIC int64_t
+xfs_bnobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
+STATIC int64_t
+xfs_cntbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ int64_t diff;
- diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ diff = be32_to_cpu(k1->alloc.ar_blockcount) -
+ be32_to_cpu(k2->alloc.ar_blockcount);
if (diff)
return diff;
- return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+ return be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
}
static bool
@@ -344,46 +395,54 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
-xfs_allocbt_keys_inorder(
+xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(k1->alloc.ar_blockcount) <
- be32_to_cpu(k2->alloc.ar_blockcount) ||
- (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
- be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock));
- }
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
}
STATIC int
-xfs_allocbt_recs_inorder(
+xfs_bnobt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(r1->alloc.ar_startblock) +
- be32_to_cpu(r1->alloc.ar_blockcount) <=
- be32_to_cpu(r2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(r1->alloc.ar_blockcount) <
- be32_to_cpu(r2->alloc.ar_blockcount) ||
- (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
- be32_to_cpu(r1->alloc.ar_startblock) <
- be32_to_cpu(r2->alloc.ar_startblock));
- }
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+}
+
+STATIC int
+xfs_cntbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
+}
+
+STATIC int
+xfs_cntbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
-#endif /* DEBUG */
-static const struct xfs_btree_ops xfs_allocbt_ops = {
+static const struct xfs_btree_ops xfs_bnobt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -395,14 +454,36 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_allocbt_key_diff,
+ .key_diff = xfs_bnobt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
- .keys_inorder = xfs_allocbt_keys_inorder,
- .recs_inorder = xfs_allocbt_recs_inorder,
-#endif
+ .diff_two_keys = xfs_bnobt_diff_two_keys,
+ .keys_inorder = xfs_bnobt_keys_inorder,
+ .recs_inorder = xfs_bnobt_recs_inorder,
+};
+
+static const struct xfs_btree_ops xfs_cntbt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_cntbt_key_diff,
+ .buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_cntbt_diff_two_keys,
+ .keys_inorder = xfs_cntbt_keys_inorder,
+ .recs_inorder = xfs_cntbt_recs_inorder,
};
/*
@@ -427,16 +508,15 @@ xfs_allocbt_init_cursor(
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
- cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_BNO)
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
- else
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
+ cur->bc_ops = &xfs_cntbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
} else {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+ cur->bc_ops = &xfs_bnobt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6622d46ddec3..de7b9bd30bec 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -114,6 +114,25 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
+/* Retrieve an extended attribute and its value. Must have ilock. */
+int
+xfs_attr_get_ilocked(
+ struct xfs_inode *ip,
+ struct xfs_da_args *args)
+{
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
+ if (!xfs_inode_hasattr(ip))
+ return -ENOATTR;
+ else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xfs_attr_shortform_getvalue(args);
+ else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+ return xfs_attr_leaf_get(args);
+ else
+ return xfs_attr_node_get(args);
+}
+
+/* Retrieve an extended attribute by name, and its value. */
int
xfs_attr_get(
struct xfs_inode *ip,
@@ -141,14 +160,7 @@ xfs_attr_get(
args.op_flags = XFS_DA_OP_OKNOENT;
lock_mode = xfs_ilock_attr_map_shared(ip);
- if (!xfs_inode_hasattr(ip))
- error = -ENOATTR;
- else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
- error = xfs_attr_shortform_getvalue(&args);
- else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
- error = xfs_attr_leaf_get(&args);
- else
- error = xfs_attr_node_get(&args);
+ error = xfs_attr_get_ilocked(ip, &args);
xfs_iunlock(ip, lock_mode);
*valuelenp = args.valuelen;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 2852521fc8ec..c6c15e5717e4 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -351,7 +351,7 @@ xfs_attr3_leaf_read(
err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
return err;
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d52f525f5b2d..5236d8e45146 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -253,7 +253,7 @@ xfs_attr_rmtval_copyout(
xfs_ino_t ino,
int *offset,
int *valuelen,
- __uint8_t **dst)
+ uint8_t **dst)
{
char *src = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
@@ -301,7 +301,7 @@ xfs_attr_rmtval_copyin(
xfs_ino_t ino,
int *offset,
int *valuelen,
- __uint8_t **src)
+ uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
@@ -355,7 +355,7 @@ xfs_attr_rmtval_get(
struct xfs_mount *mp = args->dp->i_mount;
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
- __uint8_t *dst = args->value;
+ uint8_t *dst = args->value;
int valuelen;
int nmap;
int error;
@@ -386,7 +386,8 @@ xfs_attr_rmtval_get(
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ error = xfs_trans_read_buf(mp, args->trans,
+ mp->m_ddev_targp,
dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
@@ -395,7 +396,7 @@ xfs_attr_rmtval_get(
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
- xfs_buf_relse(bp);
+ xfs_trans_brelse(args->trans, bp);
if (error)
return error;
@@ -421,7 +422,7 @@ xfs_attr_rmtval_set(
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
xfs_fileoff_t lfileoff = 0;
- __uint8_t *src = args->value;
+ uint8_t *src = args->value;
int blkcnt;
int valuelen;
int nmap;
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 90928bbe693c..afd684ae3136 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -31,10 +31,10 @@ typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
* We generate this then sort it, attr_list() must return things in hash-order.
*/
typedef struct xfs_attr_sf_sort {
- __uint8_t entno; /* entry number in original list */
- __uint8_t namelen; /* length of name value (no null) */
- __uint8_t valuelen; /* length of value */
- __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
+ uint8_t entno; /* entry number in original list */
+ uint8_t namelen; /* length of name value (no null) */
+ uint8_t valuelen; /* length of value */
+ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
} xfs_attr_sf_sort_t;
@@ -42,7 +42,7 @@ typedef struct xfs_attr_sf_sort {
#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
- ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+ ((1 << (NBBY*(int)sizeof(uint8_t))) - 1)
#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \
((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \
diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0d3e02..61c6b2025d0c 100644
--- a/fs/xfs/libxfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
@@ -25,47 +25,47 @@
/*
* masks with n high/low bits set, 64-bit values
*/
-static inline __uint64_t xfs_mask64hi(int n)
+static inline uint64_t xfs_mask64hi(int n)
{
- return (__uint64_t)-1 << (64 - (n));
+ return (uint64_t)-1 << (64 - (n));
}
-static inline __uint32_t xfs_mask32lo(int n)
+static inline uint32_t xfs_mask32lo(int n)
{
- return ((__uint32_t)1 << (n)) - 1;
+ return ((uint32_t)1 << (n)) - 1;
}
-static inline __uint64_t xfs_mask64lo(int n)
+static inline uint64_t xfs_mask64lo(int n)
{
- return ((__uint64_t)1 << (n)) - 1;
+ return ((uint64_t)1 << (n)) - 1;
}
/* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
+static inline int xfs_highbit32(uint32_t v)
{
return fls(v) - 1;
}
/* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
+static inline int xfs_highbit64(uint64_t v)
{
return fls64(v) - 1;
}
/* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
+static inline int xfs_lowbit32(uint32_t v)
{
return ffs(v) - 1;
}
/* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
+static inline int xfs_lowbit64(uint64_t v)
{
- __uint32_t w = (__uint32_t)v;
+ uint32_t w = (uint32_t)v;
int n = 0;
if (w) { /* lower bits */
n = ffs(w);
} else { /* upper bits */
- w = (__uint32_t)(v >> 32);
+ w = (uint32_t)(v >> 32);
if (w) {
n = ffs(w);
if (n)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bfc00de5c6f1..0a9880777c9c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -740,15 +740,9 @@ xfs_bmap_extents_to_btree(
* Fill in the root.
*/
block = ifp->if_broot;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
- XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
- XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
+ xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+ XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
XFS_BTREE_LONG_PTRS);
-
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
@@ -769,7 +763,6 @@ xfs_bmap_extents_to_btree(
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (dfops->dop_low) {
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = *firstblock;
} else {
@@ -785,28 +778,16 @@ try_another_ag:
return error;
}
- /*
- * During a CoW operation, the allocation and bmbt updates occur in
- * different transactions. The mapping code tries to put new bmbt
- * blocks near extents being mapped, but the only way to guarantee this
- * is if the alloc and the mapping happen in a single transaction that
- * has a block reservation. That isn't the case here, so if we run out
- * of space we'll try again with another AG.
- */
- if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
- args.fsbno == NULLFSBLOCK &&
- args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- dfops->dop_low = true;
- goto try_another_ag;
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+ xfs_iroot_realloc(ip, -1, whichfork);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return -ENOSPC;
}
/*
* Allocation can't fail, the space was reserved.
*/
- ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(*firstblock == NULLFSBLOCK ||
- args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
- (dfops->dop_low &&
- args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+ args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
@@ -817,13 +798,8 @@ try_another_ag:
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, ablock, abp->b_bn,
- XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, ablock, abp->b_bn,
- XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+ XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
@@ -934,7 +910,6 @@ xfs_bmap_local_to_extents(
* file currently fits in an inode.
*/
if (*firstblock == NULLFSBLOCK) {
-try_another_ag:
args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
@@ -947,19 +922,6 @@ try_another_ag:
if (error)
goto done;
- /*
- * During a CoW operation, the allocation and bmbt updates occur in
- * different transactions. The mapping code tries to put new bmbt
- * blocks near extents being mapped, but the only way to guarantee this
- * is if the alloc and the mapping happen in a single transaction that
- * has a block reservation. That isn't the case here, so if we run out
- * of space we'll try again with another AG.
- */
- if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) &&
- args.fsbno == NULLFSBLOCK &&
- args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- goto try_another_ag;
- }
/* Can't fail, the space was reserved. */
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
@@ -1269,7 +1231,6 @@ xfs_bmap_read_extents(
xfs_fsblock_t bno; /* block # of "block" */
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
- xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
xfs_extnum_t i, j; /* index into the extents list */
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
@@ -1278,11 +1239,8 @@ xfs_bmap_read_extents(
/* REFERENCED */
xfs_extnum_t room; /* number of entries there's room for */
- bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
- exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
- XFS_EXTFMT_INODE(ip);
block = ifp->if_broot;
/*
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
@@ -1291,9 +1249,7 @@ xfs_bmap_read_extents(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
@@ -1324,7 +1280,6 @@ xfs_bmap_read_extents(
xfs_bmbt_rec_t *frp;
xfs_fsblock_t nextbno;
xfs_extnum_t num_recs;
- xfs_extnum_t start;
num_recs = xfs_btree_get_numrecs(block);
if (unlikely(i + num_recs > room)) {
@@ -1347,23 +1302,13 @@ xfs_bmap_read_extents(
* Copy records into the extent records.
*/
frp = XFS_BMBT_REC_ADDR(mp, block, 1);
- start = i;
for (j = 0; j < num_recs; j++, i++, frp++) {
xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
trp->l0 = be64_to_cpu(frp->l0);
trp->l1 = be64_to_cpu(frp->l1);
- }
- if (exntf == XFS_EXTFMT_NOSTATE) {
- /*
- * Check all attribute bmap btree records and
- * any "older" data bmap btree records for a
- * set bit in the "extent flag" position.
- */
- if (unlikely(xfs_check_nostate_extents(ifp,
- start, num_recs))) {
+ if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) {
XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
- XFS_ERRLEVEL_LOW,
- ip->i_mount);
+ XFS_ERRLEVEL_LOW, mp);
goto error0;
}
}
@@ -1864,6 +1809,7 @@ xfs_bmap_add_extent_delay_real(
*/
trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock);
+ xfs_bmbt_set_state(ep, new->br_state);
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
(*nextents)++;
@@ -2117,8 +2063,10 @@ xfs_bmap_add_extent_delay_real(
}
temp = xfs_bmap_worst_indlen(bma->ip, temp);
temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
- diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
- (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ diff = (int)(temp + temp2 -
+ (startblockval(PREV.br_startblock) -
+ (bma->cur ?
+ bma->cur->bc_private.b.allocated : 0)));
if (diff > 0) {
error = xfs_mod_fdblocks(bma->ip->i_mount,
-((int64_t)diff), false);
@@ -2175,7 +2123,6 @@ xfs_bmap_add_extent_delay_real(
temp = da_new;
if (bma->cur)
temp += bma->cur->bc_private.b.allocated;
- ASSERT(temp <= da_old);
if (temp < da_old)
xfs_mod_fdblocks(bma->ip->i_mount,
(int64_t)(da_old - temp), false);
@@ -2202,6 +2149,7 @@ STATIC int /* error */
xfs_bmap_add_extent_unwritten_real(
struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */
+ int whichfork,
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
@@ -2221,12 +2169,14 @@ xfs_bmap_add_extent_unwritten_real(
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
- struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_mount *mp = ip->i_mount;
*logflagsp = 0;
cur = *curp;
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
ASSERT(*idx >= 0);
ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2285,7 +2235,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+ if (*idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
@@ -2325,7 +2275,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 2, state);
- ip->i_d.di_nextents -= 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2368,7 +2319,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2403,7 +2355,8 @@ xfs_bmap_add_extent_unwritten_real(
xfs_bmbt_set_state(ep, newext);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2515,7 +2468,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2593,7 +2547,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2641,7 +2596,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 2, &r[0], state);
- ip->i_d.di_nextents += 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2695,17 +2651,17 @@ xfs_bmap_add_extent_unwritten_real(
}
/* update reverse mappings */
- error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+ error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
if (error)
goto done;
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
- 0, &tmp_logflags, XFS_DATA_FORK);
+ 0, &tmp_logflags, whichfork);
*logflagsp |= tmp_logflags;
if (error)
goto done;
@@ -2717,7 +2673,7 @@ xfs_bmap_add_extent_unwritten_real(
*curp = cur;
}
- xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
done:
*logflagsp |= rval;
return error;
@@ -2809,7 +2765,8 @@ xfs_bmap_add_extent_hole_delay(
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2830,7 +2787,8 @@ xfs_bmap_add_extent_hole_delay(
xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2846,7 +2804,8 @@ xfs_bmap_add_extent_hole_delay(
temp = new->br_blockcount + right.br_blockcount;
oldlen = startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
new->br_startoff,
nullstartblock((int)newlen), temp, right.br_state);
@@ -2878,41 +2837,45 @@ xfs_bmap_add_extent_hole_delay(
*/
STATIC int /* error */
xfs_bmap_add_extent_hole_real(
- struct xfs_bmalloca *bma,
- int whichfork)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_extnum_t *idx,
+ struct xfs_btree_cur **curp,
+ struct xfs_bmbt_irec *new,
+ xfs_fsblock_t *first,
+ struct xfs_defer_ops *dfops,
+ int *logflagsp)
{
- struct xfs_bmbt_irec *new = &bma->got;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_btree_cur *cur = *curp;
int error; /* error return value */
int i; /* temp state */
- xfs_ifork_t *ifp; /* inode fork pointer */
xfs_bmbt_irec_t left; /* left neighbor extent entry */
xfs_bmbt_irec_t right; /* right neighbor extent entry */
int rval=0; /* return value (logging flags) */
int state; /* state bits, accessed thru macros */
- struct xfs_mount *mp;
- mp = bma->ip->i_mount;
- ifp = XFS_IFORK_PTR(bma->ip, whichfork);
-
- ASSERT(bma->idx >= 0);
- ASSERT(bma->idx <= xfs_iext_count(ifp));
+ ASSERT(*idx >= 0);
+ ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
- ASSERT(!bma->cur ||
- !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
- ASSERT(whichfork != XFS_COW_FORK);
+ ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
XFS_STATS_INC(mp, xs_add_exlist);
state = 0;
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
/*
* Check and set flags if this segment has a left neighbor.
*/
- if (bma->idx > 0) {
+ if (*idx > 0) {
state |= BMAP_LEFT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
if (isnullstartblock(left.br_startblock))
state |= BMAP_LEFT_DELAY;
}
@@ -2921,9 +2884,9 @@ xfs_bmap_add_extent_hole_real(
* Check and set flags if this segment has a current value.
* Not true if we're inserting into the "hole" at eof.
*/
- if (bma->idx < xfs_iext_count(ifp)) {
+ if (*idx < xfs_iext_count(ifp)) {
state |= BMAP_RIGHT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
if (isnullstartblock(right.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
@@ -2960,36 +2923,36 @@ xfs_bmap_add_extent_hole_real(
* left and on the right.
* Merge all three into a single extent record.
*/
- --bma->idx;
- trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+ --*idx;
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
left.br_blockcount + new->br_blockcount +
right.br_blockcount);
- trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
- xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+ xfs_iext_remove(ip, *idx + 1, 1, state);
- XFS_IFORK_NEXT_SET(bma->ip, whichfork,
- XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
- if (bma->cur == NULL) {
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ if (cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
- error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
+ error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
right.br_startblock, right.br_blockcount,
&i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- error = xfs_btree_delete(bma->cur, &i);
+ error = xfs_btree_delete(cur, &i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- error = xfs_btree_decrement(bma->cur, 0, &i);
+ error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- error = xfs_bmbt_update(bma->cur, left.br_startoff,
+ error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
new->br_blockcount +
@@ -3006,23 +2969,23 @@ xfs_bmap_add_extent_hole_real(
* on the left.
* Merge the new allocation with the left neighbor.
*/
- --bma->idx;
- trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+ --*idx;
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
left.br_blockcount + new->br_blockcount);
- trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
- if (bma->cur == NULL) {
+ if (cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
- error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
+ error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
left.br_startblock, left.br_blockcount,
&i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- error = xfs_bmbt_update(bma->cur, left.br_startoff,
+ error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
new->br_blockcount,
@@ -3038,25 +3001,25 @@ xfs_bmap_add_extent_hole_real(
* on the right.
* Merge the new allocation with the right neighbor.
*/
- trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
new->br_startoff, new->br_startblock,
new->br_blockcount + right.br_blockcount,
right.br_state);
- trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
- if (bma->cur == NULL) {
+ if (cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
- error = xfs_bmbt_lookup_eq(bma->cur,
+ error = xfs_bmbt_lookup_eq(cur,
right.br_startoff,
right.br_startblock,
right.br_blockcount, &i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- error = xfs_bmbt_update(bma->cur, new->br_startoff,
+ error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
right.br_blockcount,
@@ -3072,22 +3035,22 @@ xfs_bmap_add_extent_hole_real(
* real allocation.
* Insert a new entry.
*/
- xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
- XFS_IFORK_NEXT_SET(bma->ip, whichfork,
- XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
- if (bma->cur == NULL) {
+ xfs_iext_insert(ip, *idx, 1, new, state);
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+ if (cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
- error = xfs_bmbt_lookup_eq(bma->cur,
+ error = xfs_bmbt_lookup_eq(cur,
new->br_startoff,
new->br_startblock,
new->br_blockcount, &i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
- bma->cur->bc_rec.b.br_state = new->br_state;
- error = xfs_btree_insert(bma->cur, &i);
+ cur->bc_rec.b.br_state = new->br_state;
+ error = xfs_btree_insert(cur, &i);
if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -3096,30 +3059,30 @@ xfs_bmap_add_extent_hole_real(
}
/* add reverse mapping */
- error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
+ error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
if (error)
goto done;
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
+ if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
- ASSERT(bma->cur == NULL);
- error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
- bma->firstblock, bma->dfops, &bma->cur,
+ ASSERT(cur == NULL);
+ error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp,
0, &tmp_logflags, whichfork);
- bma->logflags |= tmp_logflags;
+ *logflagsp |= tmp_logflags;
+ cur = *curp;
if (error)
goto done;
}
/* clear out the allocated field, done with it now in any case. */
- if (bma->cur)
- bma->cur->bc_private.b.allocated = 0;
+ if (cur)
+ cur->bc_private.b.allocated = 0;
- xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
+ xfs_bmap_check_leaf_extents(cur, ip, whichfork);
done:
- bma->logflags |= rval;
+ *logflagsp |= rval;
return error;
}
@@ -3822,17 +3785,13 @@ xfs_bmap_btalloc(
* the first block that was allocated.
*/
ASSERT(*ap->firstblock == NULLFSBLOCK ||
- XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
- XFS_FSB_TO_AGNO(mp, args.fsbno) ||
- (ap->dfops->dop_low &&
- XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
- XFS_FSB_TO_AGNO(mp, args.fsbno)));
+ XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
+ XFS_FSB_TO_AGNO(mp, args.fsbno));
ap->blkno = args.fsbno;
if (*ap->firstblock == NULLFSBLOCK)
*ap->firstblock = args.fsbno;
- ASSERT(nullfb || fb_agno == args.agno ||
- (ap->dfops->dop_low && fb_agno < args.agno));
+ ASSERT(nullfb || fb_agno <= args.agno);
ap->length = args.len;
if (!(ap->flags & XFS_BMAPI_COWFORK))
ap->ip->i_d.di_nblocks += args.len;
@@ -3855,60 +3814,6 @@ xfs_bmap_btalloc(
}
/*
- * For a remap operation, just "allocate" an extent at the address that the
- * caller passed in, and ensure that the AGFL is the right size. The caller
- * will then map the "allocated" extent into the file somewhere.
- */
-STATIC int
-xfs_bmap_remap_alloc(
- struct xfs_bmalloca *ap)
-{
- struct xfs_trans *tp = ap->tp;
- struct xfs_mount *mp = tp->t_mountp;
- xfs_agblock_t bno;
- struct xfs_alloc_arg args;
- int error;
-
- /*
- * validate that the block number is legal - the enables us to detect
- * and handle a silent filesystem corruption rather than crashing.
- */
- memset(&args, 0, sizeof(struct xfs_alloc_arg));
- args.tp = ap->tp;
- args.mp = ap->tp->t_mountp;
- bno = *ap->firstblock;
- args.agno = XFS_FSB_TO_AGNO(mp, bno);
- args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
- if (args.agno >= mp->m_sb.sb_agcount ||
- args.agbno >= mp->m_sb.sb_agblocks)
- return -EFSCORRUPTED;
-
- /* "Allocate" the extent from the range we passed in. */
- trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
- ap->blkno = bno;
- ap->ip->i_d.di_nblocks += ap->length;
- xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-
- /* Fix the freelist, like a real allocator does. */
- args.datatype = ap->datatype;
- args.pag = xfs_perag_get(args.mp, args.agno);
- ASSERT(args.pag);
-
- /*
- * The freelist fixing code will decline the allocation if
- * the size and shape of the free space doesn't allow for
- * allocating the extent and updating all the metadata that
- * happens during an allocation. We're remapping, not
- * allocating, so skip that check by pretending to be freeing.
- */
- error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
- xfs_perag_put(args.pag);
- if (error)
- trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
- return error;
-}
-
-/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent.
*/
@@ -3916,8 +3821,6 @@ STATIC int
xfs_bmap_alloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{
- if (ap->flags & XFS_BMAPI_REMAP)
- return xfs_bmap_remap_alloc(ap);
if (XFS_IS_REALTIME_INODE(ap->ip) &&
xfs_alloc_is_userdata(ap->datatype))
return xfs_bmap_rtalloc(ap);
@@ -4089,7 +3992,7 @@ xfs_bmapi_read(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -4156,6 +4059,19 @@ xfs_bmapi_read(
return 0;
}
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
@@ -4242,13 +4158,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
- /*
- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
- * might have merged it into one of the neighbouring ones.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
/*
* Tag the inode if blocks were preallocated. Note that COW fork
@@ -4260,10 +4171,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
- ASSERT(got->br_startoff <= aoff);
- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
- ASSERT(isnullstartblock(got->br_startblock));
- ASSERT(got->br_state == XFS_EXT_NORM);
return 0;
out_unreserve_blocks:
@@ -4368,17 +4275,25 @@ xfs_bmapi_allocate(
bma->got.br_state = XFS_EXT_NORM;
/*
- * A wasdelay extent has been initialized, so shouldn't be flagged
- * as unwritten.
+ * In the data fork, a wasdelay extent has been initialized, so
+ * shouldn't be flagged as unwritten.
+ *
+ * For the cow fork, however, we convert delalloc reservations
+ * (extents allocated for speculative preallocation) to
+ * allocated unwritten extents, and only convert the unwritten
+ * extents to real extents when we're about to write the data.
*/
- if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+ if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
+ (bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN;
if (bma->wasdel)
error = xfs_bmap_add_extent_delay_real(bma, whichfork);
else
- error = xfs_bmap_add_extent_hole_real(bma, whichfork);
+ error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
+ whichfork, &bma->idx, &bma->cur, &bma->got,
+ bma->firstblock, bma->dfops, &bma->logflags);
bma->logflags |= tmp_logflags;
if (error)
@@ -4422,8 +4337,6 @@ xfs_bmapi_convert_unwritten(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0;
- ASSERT(whichfork != XFS_COW_FORK);
-
/*
* Modify (by adding) the state flag, if writing.
*/
@@ -4448,8 +4361,8 @@ xfs_bmapi_convert_unwritten(
return error;
}
- error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
- &bma->cur, mval, bma->firstblock, bma->dfops,
+ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
+ &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
&tmp_logflags);
/*
* Log the inode core unconditionally in the unwritten extent conversion
@@ -4458,8 +4371,12 @@ xfs_bmapi_convert_unwritten(
* in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned.
+ *
+ * Note: If we're only converting cow fork extents, there aren't
+ * any on-disk updates to make, so we don't need to log anything.
*/
- bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+ if (whichfork != XFS_COW_FORK)
+ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;
@@ -4533,15 +4450,13 @@ xfs_bmapi_write(
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & XFS_BMAPI_IGSTATE));
- ASSERT(tp != NULL);
+ ASSERT(tp != NULL ||
+ (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
+ (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
- ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
- ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
+ ASSERT(!(flags & XFS_BMAPI_REMAP));
/* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4558,7 +4473,7 @@ xfs_bmapi_write(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -4625,13 +4540,8 @@ xfs_bmapi_write(
} else {
need_alloc = true;
}
- } else {
- /*
- * Make sure we only reflink into a hole.
- */
- ASSERT(!(flags & XFS_BMAPI_REMAP));
- if (isnullstartblock(bma.got.br_startblock))
- wasdelay = true;
+ } else if (isnullstartblock(bma.got.br_startblock)) {
+ wasdelay = true;
}
/*
@@ -4746,13 +4656,9 @@ error0:
if (bma.cur) {
if (!error) {
ASSERT(*firstblock == NULLFSBLOCK ||
- XFS_FSB_TO_AGNO(mp, *firstblock) ==
+ XFS_FSB_TO_AGNO(mp, *firstblock) <=
XFS_FSB_TO_AGNO(mp,
- bma.cur->bc_private.b.firstblock) ||
- (dfops->dop_low &&
- XFS_FSB_TO_AGNO(mp, *firstblock) <
- XFS_FSB_TO_AGNO(mp,
- bma.cur->bc_private.b.firstblock)));
+ bma.cur->bc_private.b.firstblock));
*firstblock = bma.cur->bc_private.b.firstblock;
}
xfs_btree_del_cursor(bma.cur,
@@ -4764,6 +4670,93 @@ error0:
return error;
}
+static int
+xfs_bmapi_remap(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_fileoff_t bno,
+ xfs_filblks_t len,
+ xfs_fsblock_t startblock,
+ struct xfs_defer_ops *dfops)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ struct xfs_btree_cur *cur = NULL;
+ xfs_fsblock_t firstblock = NULLFSBLOCK;
+ struct xfs_bmbt_irec got;
+ xfs_extnum_t idx;
+ int logflags = 0, error;
+
+ ASSERT(len > 0);
+ ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
+
+ if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
+ /* make sure we only reflink into a hole. */
+ ASSERT(got.br_startoff > bno);
+ ASSERT(got.br_startoff - bno >= len);
+ }
+
+ ip->i_d.di_nblocks += len;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ if (ifp->if_flags & XFS_IFBROOT) {
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
+ cur->bc_private.b.firstblock = firstblock;
+ cur->bc_private.b.dfops = dfops;
+ cur->bc_private.b.flags = 0;
+ }
+
+ got.br_startoff = bno;
+ got.br_startblock = startblock;
+ got.br_blockcount = len;
+ got.br_state = XFS_EXT_NORM;
+
+ error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur,
+ &got, &firstblock, dfops, &logflags);
+ if (error)
+ goto error0;
+
+ if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) {
+ int tmp_logflags = 0;
+
+ error = xfs_bmap_btree_to_extents(tp, ip, cur,
+ &tmp_logflags, XFS_DATA_FORK);
+ logflags |= tmp_logflags;
+ }
+
+error0:
+ if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
+ logflags &= ~XFS_ILOG_DEXT;
+ else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
+ logflags &= ~XFS_ILOG_DBROOT;
+
+ if (logflags)
+ xfs_trans_log_inode(tp, ip, logflags);
+ if (cur) {
+ xfs_btree_del_cursor(cur,
+ error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ }
+ return error;
+}
+
/*
* When a delalloc extent is split (e.g., due to a hole punch), the original
* indlen reservation must be shared across the two new extents that are left
@@ -4787,34 +4780,59 @@ xfs_bmap_split_indlen(
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
xfs_filblks_t stolen = 0;
+ xfs_filblks_t resfactor;
/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents.
*/
- while (nres > ores && avail) {
- nres--;
- avail--;
- stolen++;
- }
+ if (ores < nres && avail)
+ stolen = XFS_FILBLKS_MIN(nres - ores, avail);
+ ores += stolen;
+
+ /* nothing else to do if we've satisfied the new reservation */
+ if (ores >= nres)
+ return stolen;
/*
- * The only blocks available are those reserved for the original
- * extent and what we can steal from the extent being removed.
- * If this still isn't enough to satisfy the combined
- * requirements for the two new extents, skim blocks off of each
- * of the new reservations until they match what is available.
+ * We can't meet the total required reservation for the two extents.
+ * Calculate the percent of the overall shortage between both extents
+ * and apply this percentage to each of the requested indlen values.
+ * This distributes the shortage fairly and reduces the chances that one
+ * of the two extents is left with nothing when extents are repeatedly
+ * split.
*/
- while (nres > ores) {
- if (len1) {
- len1--;
- nres--;
+ resfactor = (ores * 100);
+ do_div(resfactor, nres);
+ len1 *= resfactor;
+ do_div(len1, 100);
+ len2 *= resfactor;
+ do_div(len2, 100);
+ ASSERT(len1 + len2 <= ores);
+ ASSERT(len1 < *indlen1 && len2 < *indlen2);
+
+ /*
+ * Hand out the remainder to each extent. If one of the two reservations
+ * is zero, we want to make sure that one gets a block first. The loop
+ * below starts with len1, so hand len2 a block right off the bat if it
+ * is zero.
+ */
+ ores -= (len1 + len2);
+ ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
+ if (ores && !len2 && *indlen2) {
+ len2++;
+ ores--;
+ }
+ while (ores) {
+ if (len1 < *indlen1) {
+ len1++;
+ ores--;
}
- if (nres == ores)
+ if (!ores)
break;
- if (len2) {
- len2--;
- nres--;
+ if (len2 < *indlen2) {
+ len2++;
+ ores--;
}
}
@@ -4856,7 +4874,7 @@ xfs_bmap_del_extent_delay(
ASSERT(got_endoff >= del_endoff);
if (isrt) {
- int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+ uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_frextents(mp, rtexts);
@@ -5416,6 +5434,7 @@ __xfs_bunmapi(
int whichfork; /* data or attribute fork */
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
+ xfs_fileoff_t max_len;
trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
@@ -5437,6 +5456,16 @@ __xfs_bunmapi(
ASSERT(len > 0);
ASSERT(nexts >= 0);
+ /*
+ * Guesstimate how many blocks we can unmap without running the risk of
+ * blowing out the transaction with a mix of EFIs and reflink
+ * adjustments.
+ */
+ if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+ max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
+ else
+ max_len = len;
+
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
@@ -5481,7 +5510,7 @@ __xfs_bunmapi(
extno = 0;
while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
- (nexts == 0 || extno < nexts)) {
+ (nexts == 0 || extno < nexts) && max_len > 0) {
/*
* Is the found extent after a hole in which bno lives?
* Just back up to the previous extent, if so.
@@ -5513,6 +5542,15 @@ __xfs_bunmapi(
}
if (del.br_startoff + del.br_blockcount > bno + 1)
del.br_blockcount = bno + 1 - del.br_startoff;
+
+ /* How much can we safely unmap? */
+ if (max_len < del.br_blockcount) {
+ del.br_startoff += del.br_blockcount - max_len;
+ if (!wasdel)
+ del.br_startblock += del.br_blockcount - max_len;
+ del.br_blockcount = max_len;
+ }
+
sum = del.br_startblock + del.br_blockcount;
if (isrt &&
(mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
@@ -5556,8 +5594,8 @@ __xfs_bunmapi(
}
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, ip,
- &lastx, &cur, &del, firstblock, dfops,
- &logflags);
+ whichfork, &lastx, &cur, &del,
+ firstblock, dfops, &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5610,8 +5648,9 @@ __xfs_bunmapi(
prev.br_state = XFS_EXT_UNWRITTEN;
lastx--;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &prev,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &prev, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5619,8 +5658,9 @@ __xfs_bunmapi(
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &del,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &del, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5687,6 +5727,7 @@ __xfs_bunmapi(
if (!isrt && wasdel)
xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+ max_len -= del.br_blockcount;
bno = del.br_startoff - 1;
nodelete:
/*
@@ -6057,7 +6098,7 @@ xfs_bmap_shift_extents(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_shift_extents",
XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
@@ -6209,7 +6250,7 @@ xfs_bmap_split_extent_at(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
@@ -6452,49 +6493,33 @@ xfs_bmap_finish_one(
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
+ xfs_filblks_t *blockcount,
xfs_exntst_t state)
{
- struct xfs_bmbt_irec bmap;
- int nimaps = 1;
xfs_fsblock_t firstfsb;
- int flags = XFS_BMAPI_REMAP;
- int done;
int error = 0;
- bmap.br_startblock = startblock;
- bmap.br_startoff = startoff;
- bmap.br_blockcount = blockcount;
- bmap.br_state = state;
-
trace_xfs_bmap_deferred(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
- ip->i_ino, whichfork, startoff, blockcount, state);
+ ip->i_ino, whichfork, startoff, *blockcount, state);
- if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
+ if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
return -EFSCORRUPTED;
- if (whichfork == XFS_ATTR_FORK)
- flags |= XFS_BMAPI_ATTRFORK;
if (XFS_TEST_ERROR(false, tp->t_mountp,
- XFS_ERRTAG_BMAP_FINISH_ONE,
- XFS_RANDOM_BMAP_FINISH_ONE))
+ XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
switch (type) {
case XFS_BMAP_MAP:
- firstfsb = bmap.br_startblock;
- error = xfs_bmapi_write(tp, ip, bmap.br_startoff,
- bmap.br_blockcount, flags, &firstfsb,
- bmap.br_blockcount, &bmap, &nimaps,
- dfops);
+ error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
+ startblock, dfops);
+ *blockcount = 0;
break;
case XFS_BMAP_UNMAP:
- error = xfs_bunmapi(tp, ip, bmap.br_startoff,
- bmap.br_blockcount, flags, 1, &firstfsb,
- dfops, &done);
- ASSERT(done);
+ error = __xfs_bunmapi(tp, ip, startoff, blockcount,
+ XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
break;
default:
ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cdef87db5262..851982a5dfbc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -172,6 +172,18 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags)
/*
+ * Return true if the extent is a real, allocated extent, or false if it is a
+ * delayed allocation, and unwritten extent or a hole.
+ */
+static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
+{
+ return irec->br_state != XFS_EXT_UNWRITTEN &&
+ irec->br_startblock != HOLESTARTBLOCK &&
+ irec->br_startblock != DELAYSTARTBLOCK &&
+ !isnullstartblock(irec->br_startblock);
+}
+
+/*
* This macro is used to determine how many extents will be shifted
* in one write transaction. We could require two splits,
* an extent move on the first and an extent merge on the second,
@@ -232,8 +244,6 @@ int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *del);
void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
-int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
- xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
@@ -261,7 +271,7 @@ struct xfs_bmap_intent {
int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
struct xfs_inode *ip, enum xfs_bmap_intent_type type,
int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
- xfs_filblks_t blockcount, xfs_exntst_t state);
+ xfs_filblks_t *blockcount, xfs_exntst_t state);
int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d9be241fc86f..85de22513014 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt(
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
- XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
- XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+ XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
-
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
@@ -100,8 +94,8 @@ xfs_bmdr_to_bmbt(
*/
STATIC void
__xfs_bmbt_get_all(
- __uint64_t l0,
- __uint64_t l1,
+ uint64_t l0,
+ uint64_t l1,
xfs_bmbt_irec_t *s)
{
int ext_flag;
@@ -372,32 +366,6 @@ xfs_bmbt_to_bmdr(
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
-/*
- * Check extent records, which have just been read, for
- * any bit in the extent flag field. ASSERT on debug
- * kernels, as this condition should not occur.
- * Return an error condition (1) if any flags found,
- * otherwise return 0.
- */
-
-int
-xfs_check_nostate_extents(
- xfs_ifork_t *ifp,
- xfs_extnum_t idx,
- xfs_extnum_t num)
-{
- for (; num > 0; num--, idx++) {
- xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
- if ((ep->l0 >>
- (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
- ASSERT(0);
- return 1;
- }
- }
- return 0;
-}
-
-
STATIC struct xfs_btree_cur *
xfs_bmbt_dup_cursor(
struct xfs_btree_cur *cur)
@@ -453,7 +421,6 @@ xfs_bmbt_alloc_block(
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
/*
* Make sure there is sufficient room left in the AG to
@@ -483,22 +450,6 @@ try_another_ag:
if (error)
goto error0;
- /*
- * During a CoW operation, the allocation and bmbt updates occur in
- * different transactions. The mapping code tries to put new bmbt
- * blocks near extents being mapped, but the only way to guarantee this
- * is if the alloc and the mapping happen in a single transaction that
- * has a block reservation. That isn't the case here, so if we run out
- * of space we'll try again with another AG.
- */
- if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
- args.fsbno == NULLFSBLOCK &&
- args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- cur->bc_private.b.dfops->dop_low = true;
- args.fsbno = cur->bc_private.b.firstblock;
- goto try_another_ag;
- }
-
if (args.fsbno == NULLFSBLOCK && args.minleft) {
/*
* Could not find an AG with enough free space to satisfy
@@ -512,7 +463,7 @@ try_another_ag:
goto error0;
cur->bc_private.b.dfops->dop_low = true;
}
- if (args.fsbno == NULLFSBLOCK) {
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
@@ -622,6 +573,16 @@ xfs_bmbt_init_key_from_rec(
}
STATIC void
+xfs_bmbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->bmbt.br_startoff = cpu_to_be64(
+ xfs_bmbt_disk_get_startoff(&rec->bmbt) +
+ xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1);
+}
+
+STATIC void
xfs_bmbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -637,15 +598,25 @@ xfs_bmbt_init_ptr_from_cur(
ptr->l = 0;
}
-STATIC __int64_t
+STATIC int64_t
xfs_bmbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+ return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
cur->bc_rec.b.br_startoff;
}
+STATIC int64_t
+xfs_bmbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
+ be64_to_cpu(k2->bmbt.br_startoff);
+}
+
static bool
xfs_bmbt_verify(
struct xfs_buf *bp)
@@ -736,7 +707,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bmbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -757,7 +727,6 @@ xfs_bmbt_recs_inorder(
xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
xfs_bmbt_disk_get_startoff(&r2->bmbt);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
@@ -771,14 +740,14 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.get_minrecs = xfs_bmbt_get_minrecs,
.get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
+ .diff_two_keys = xfs_bmbt_diff_two_keys,
.buf_ops = &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 819a8a4dee95..9da5a8d4f184 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -25,14 +25,6 @@ struct xfs_inode;
struct xfs_trans;
/*
- * Extent state and extent format macros.
- */
-#define XFS_EXTFMT_INODE(x) \
- (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
- XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
-#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN)
-
-/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_BMBT_BLOCK_LEN(mp) \
@@ -140,4 +132,18 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
+/*
+ * Check that the extent does not contain an invalid unwritten extent flag.
+ */
+static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
+ struct xfs_bmbt_rec_host *ep)
+{
+ if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0)
+ return true;
+ if (whichfork == XFS_DATA_FORK &&
+ xfs_sb_version_hasextflgbit(&mp->m_sb))
+ return true;
+ return false;
+}
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 21e6a6ab6b9a..4da85fff69ad 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,15 +43,25 @@ kmem_zone_t *xfs_btree_cur_zone;
/*
* Btree magic numbers.
*/
-static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
XFS_FIBT_MAGIC, 0 },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
XFS_REFC_CRC_MAGIC }
};
-#define xfs_btree_magic(cur) \
- xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
+
+uint32_t
+xfs_btree_magic(
+ int crc,
+ xfs_btnum_t btnum)
+{
+ uint32_t magic = xfs_magics[crc][btnum];
+
+ /* Ensure we asked for crc for crc-only magics. */
+ ASSERT(magic != 0);
+ return magic;
+}
STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(
@@ -62,10 +72,13 @@ xfs_btree_check_lblock(
{
int lblock_ok = 1; /* block passes checks */
struct xfs_mount *mp; /* file system mount point */
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc;
mp = cur->bc_mp;
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (crc) {
lblock_ok = lblock_ok &&
uuid_equal(&block->bb_u.l.bb_uuid,
&mp->m_sb.sb_meta_uuid) &&
@@ -74,7 +87,7 @@ xfs_btree_check_lblock(
}
lblock_ok = lblock_ok &&
- be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -88,8 +101,7 @@ xfs_btree_check_lblock(
be64_to_cpu(block->bb_u.l.bb_rightsib)));
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
- XFS_ERRTAG_BTREE_CHECK_LBLOCK,
- XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+ XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -110,13 +122,16 @@ xfs_btree_check_sblock(
struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
int sblock_ok = 1; /* block passes checks */
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc;
mp = cur->bc_mp;
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
agbp = cur->bc_private.a.agbp;
agf = XFS_BUF_TO_AGF(agbp);
agflen = be32_to_cpu(agf->agf_length);
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (crc) {
sblock_ok = sblock_ok &&
uuid_equal(&block->bb_u.s.bb_uuid,
&mp->m_sb.sb_meta_uuid) &&
@@ -125,7 +140,7 @@ xfs_btree_check_sblock(
}
sblock_ok = sblock_ok &&
- be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -137,8 +152,7 @@ xfs_btree_check_sblock(
block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
- XFS_ERRTAG_BTREE_CHECK_SBLOCK,
- XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+ XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -552,7 +566,7 @@ xfs_btree_ptr_offset(
/*
* Return a pointer to the n-th record in the btree block.
*/
-STATIC union xfs_btree_rec *
+union xfs_btree_rec *
xfs_btree_rec_addr(
struct xfs_btree_cur *cur,
int n,
@@ -565,7 +579,7 @@ xfs_btree_rec_addr(
/*
* Return a pointer to the n-th key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -578,7 +592,7 @@ xfs_btree_key_addr(
/*
* Return a pointer to the n-th high key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_high_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -591,7 +605,7 @@ xfs_btree_high_key_addr(
/*
* Return a pointer to the n-th block pointer in the btree block.
*/
-STATIC union xfs_btree_ptr *
+union xfs_btree_ptr *
xfs_btree_ptr_addr(
struct xfs_btree_cur *cur,
int n,
@@ -625,7 +639,7 @@ xfs_btree_get_iroot(
* Retrieve the block pointer from the cursor at the given level.
* This may be an inode btree root or from a buffer.
*/
-STATIC struct xfs_btree_block * /* generic btree block pointer */
+struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in btree */
@@ -762,14 +776,14 @@ xfs_btree_lastrec(
*/
void
xfs_btree_offsets(
- __int64_t fields, /* bitmask of fields */
+ int64_t fields, /* bitmask of fields */
const short *offsets, /* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
int *last) /* output: last byte offset */
{
int i; /* current bit number */
- __int64_t imask; /* mask for current bit number */
+ int64_t imask; /* mask for current bit number */
ASSERT(fields != 0);
/*
@@ -810,7 +824,8 @@ xfs_btree_read_bufl(
xfs_daddr_t d; /* real disk block address */
int error;
- ASSERT(fsbno != NULLFSBLOCK);
+ if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+ return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
mp->m_bsize, lock, &bp, ops);
@@ -1084,12 +1099,15 @@ xfs_btree_init_block_int(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
xfs_daddr_t blkno,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags)
{
+ int crc = xfs_sb_version_hascrc(&mp->m_sb);
+ __u32 magic = xfs_btree_magic(crc, btnum);
+
buf->bb_magic = cpu_to_be32(magic);
buf->bb_level = cpu_to_be16(level);
buf->bb_numrecs = cpu_to_be16(numrecs);
@@ -1097,7 +1115,7 @@ xfs_btree_init_block_int(
if (flags & XFS_BTREE_LONG_PTRS) {
buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
- if (flags & XFS_BTREE_CRC_BLOCKS) {
+ if (crc) {
buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.l.bb_owner = cpu_to_be64(owner);
uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
@@ -1110,7 +1128,7 @@ xfs_btree_init_block_int(
buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- if (flags & XFS_BTREE_CRC_BLOCKS) {
+ if (crc) {
buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
@@ -1123,14 +1141,14 @@ void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags)
{
xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- magic, level, numrecs, owner, flags);
+ btnum, level, numrecs, owner, flags);
}
STATIC void
@@ -1140,7 +1158,7 @@ xfs_btree_init_block_cur(
int level,
int numrecs)
{
- __u64 owner;
+ __u64 owner;
/*
* we can pull the owner from the cursor right now as the different
@@ -1154,7 +1172,7 @@ xfs_btree_init_block_cur(
owner = cur->bc_private.a.agno;
xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- xfs_btree_magic(cur), level, numrecs,
+ cur->bc_btnum, level, numrecs,
owner, cur->bc_flags);
}
@@ -1736,7 +1754,7 @@ error0:
return error;
}
-STATIC int
+int
xfs_btree_lookup_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in the btree */
@@ -1826,7 +1844,7 @@ xfs_btree_lookup(
int *stat) /* success/failure */
{
struct xfs_btree_block *block; /* current btree block */
- __int64_t diff; /* difference for the current key */
+ int64_t diff; /* difference for the current key */
int error; /* error return value */
int keyno; /* current key number */
int level; /* level in the btree */
@@ -2866,7 +2884,7 @@ xfs_btree_split_worker(
struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work);
unsigned long pflags;
- unsigned long new_pflags = PF_FSTRANS;
+ unsigned long new_pflags = PF_MEMALLOC_NOFS;
/*
* we are in a transaction context here, but may also be doing work
@@ -4375,7 +4393,7 @@ xfs_btree_visit_blocks(
xfs_btree_readahead_ptr(cur, ptr, 1);
/* save for the next iteration of the loop */
- lptr = *ptr;
+ xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
}
/* for each buffer in the level */
@@ -4415,7 +4433,7 @@ xfs_btree_visit_blocks(
* recovery completion writes the changes to disk.
*/
struct xfs_btree_block_change_owner_info {
- __uint64_t new_owner;
+ uint64_t new_owner;
struct list_head *buffer_list;
};
@@ -4461,7 +4479,7 @@ xfs_btree_block_change_owner(
int
xfs_btree_change_owner(
struct xfs_btree_cur *cur,
- __uint64_t new_owner,
+ uint64_t new_owner,
struct list_head *buffer_list)
{
struct xfs_btree_block_change_owner_info bbcoi;
@@ -4565,7 +4583,7 @@ xfs_btree_simple_query_range(
{
union xfs_btree_rec *recp;
union xfs_btree_key rec_key;
- __int64_t diff;
+ int64_t diff;
int stat;
bool firstrec = true;
int error;
@@ -4662,8 +4680,8 @@ xfs_btree_overlapped_query_range(
union xfs_btree_key *hkp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
- __int64_t ldiff;
- __int64_t hdiff;
+ int64_t ldiff;
+ int64_t hdiff;
int level;
struct xfs_buf *bp;
int i;
@@ -4822,6 +4840,23 @@ xfs_btree_query_range(
fn, priv);
}
+/* Query a btree for all records. */
+int
+xfs_btree_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_btree_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_key low_key;
+ union xfs_btree_key high_key;
+
+ memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
+ memset(&low_key, 0, sizeof(low_key));
+ memset(&high_key, 0xFF, sizeof(high_key));
+
+ return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
+}
+
/*
* Calculate the number of blocks needed to store a given number of records
* in a short-format (per-AG metadata) btree.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b69b947c4c1b..9c95e965cfe5 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -76,6 +76,8 @@ union xfs_btree_rec {
#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi)
+uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+
/*
* For logging record fields.
*/
@@ -148,20 +150,19 @@ struct xfs_btree_ops {
union xfs_btree_rec *rec);
/* difference between key value and cursor value */
- __int64_t (*key_diff)(struct xfs_btree_cur *cur,
+ int64_t (*key_diff)(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
/*
* Difference between key2 and key1 -- positive if key1 > key2,
* negative if key1 < key2, and zero if equal.
*/
- __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+ int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
union xfs_btree_key *key1,
union xfs_btree_key *key2);
const struct xfs_buf_ops *buf_ops;
-#if defined(DEBUG) || defined(XFS_WARN)
/* check that k1 is lower than k2 */
int (*keys_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
@@ -171,7 +172,6 @@ struct xfs_btree_ops {
int (*recs_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
-#endif
};
/*
@@ -211,11 +211,11 @@ typedef struct xfs_btree_cur
union xfs_btree_irec bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
- __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
+ uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */
#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */
- __uint8_t bc_nlevels; /* number of levels in the tree */
- __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
+ uint8_t bc_nlevels; /* number of levels in the tree */
+ uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
int bc_statoff; /* offset of btre stats array */
union {
@@ -328,7 +328,7 @@ xfs_btree_islastblock(
*/
void
xfs_btree_offsets(
- __int64_t fields, /* bitmask of fields */
+ int64_t fields, /* bitmask of fields */
const short *offsets,/* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
@@ -378,7 +378,7 @@ void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
@@ -389,7 +389,7 @@ xfs_btree_init_block_int(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
xfs_daddr_t blkno,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
@@ -406,7 +406,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
int xfs_btree_insert(struct xfs_btree_cur *, int *);
int xfs_btree_delete(struct xfs_btree_cur *, int *);
int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
-int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner,
struct list_head *buffer_list);
/*
@@ -432,7 +432,7 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
}
static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
- __uint16_t numrecs)
+ uint16_t numrecs)
{
block->bb_numrecs = cpu_to_be16(numrecs);
}
@@ -456,7 +456,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
#define XFS_FSB_SANITY_CHECK(mp,fsb) \
- (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+ (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
/*
@@ -494,6 +494,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
int xfs_btree_query_range(struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn, void *priv);
+int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
+ void *priv);
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
void *data);
@@ -502,4 +504,17 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
+union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
+ union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
+ int level, struct xfs_buf **bpp);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index a416c7cb23ea..8211f48b98e6 100644
--- a/fs/xfs/libxfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
@@ -1,7 +1,7 @@
#ifndef _XFS_CKSUM_H
#define _XFS_CKSUM_H 1
-#define XFS_CRC_SEED (~(__uint32_t)0)
+#define XFS_CRC_SEED (~(uint32_t)0)
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
@@ -9,11 +9,11 @@
* cksum_offset parameter. We do not modify the buffer during verification,
* hence we have to split the CRC calculation across the cksum_offset.
*/
-static inline __uint32_t
+static inline uint32_t
xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t zero = 0;
- __uint32_t crc;
+ uint32_t zero = 0;
+ uint32_t crc;
/* Calculate CRC up to the checksum. */
crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
@@ -30,7 +30,7 @@ xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
* Fast CRC method where the buffer is modified. Callers must have exclusive
* access to the buffer while the calculation takes place.
*/
-static inline __uint32_t
+static inline uint32_t
xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
{
/* zero the CRC field */
@@ -48,7 +48,7 @@ xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
* so that it is consistent on disk.
*/
static inline __le32
-xfs_end_cksum(__uint32_t crc)
+xfs_end_cksum(uint32_t crc)
{
return ~cpu_to_le32(crc);
}
@@ -62,7 +62,7 @@ xfs_end_cksum(__uint32_t crc)
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
+ uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
@@ -73,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
+ uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index f2dc1a950c85..6d4335815c3f 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -263,7 +263,7 @@ xfs_da3_node_read(
err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
which_fork, &xfs_da3_node_buf_ops);
- if (!err && tp) {
+ if (!err && tp && *bpp) {
struct xfs_da_blkinfo *info = (*bpp)->b_addr;
int type;
@@ -1282,7 +1282,7 @@ xfs_da3_fixhashpath(
return;
break;
case XFS_DIR2_LEAFN_MAGIC:
- lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+ lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
if (count == 0)
return;
break;
@@ -1502,8 +1502,8 @@ xfs_da3_node_lookup_int(
if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
blk->magic == XFS_DIR3_LEAFN_MAGIC) {
blk->magic = XFS_DIR2_LEAFN_MAGIC;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+ blk->bp, NULL);
break;
}
@@ -1929,8 +1929,8 @@ xfs_da3_path_shift(
blk->magic = XFS_DIR2_LEAFN_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+ blk->bp, NULL);
break;
default:
ASSERT(0);
@@ -1952,7 +1952,7 @@ xfs_da3_path_shift(
* This is implemented with some source-level loop unrolling.
*/
xfs_dahash_t
-xfs_da_hashname(const __uint8_t *name, int namelen)
+xfs_da_hashname(const uint8_t *name, int namelen)
{
xfs_dahash_t hash;
@@ -2633,7 +2633,7 @@ out_free:
/*
* Readahead the dir/attr block.
*/
-xfs_daddr_t
+int
xfs_da_reada_buf(
struct xfs_inode *dp,
xfs_dablk_t bno,
@@ -2664,7 +2664,5 @@ out_free:
if (mapp != &map)
kmem_free(mapp);
- if (error)
- return -1;
- return mappedbno;
+ return error;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 98c75cbe6ac2..ae6de17467f2 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -60,10 +60,10 @@ enum xfs_dacmp {
*/
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
- const __uint8_t *name; /* string (maybe not NULL terminated) */
+ const uint8_t *name; /* string (maybe not NULL terminated) */
int namelen; /* length of string (maybe no NULL) */
- __uint8_t filetype; /* filetype of inode for directories */
- __uint8_t *value; /* set of bytes (maybe contain NULLs) */
+ uint8_t filetype; /* filetype of inode for directories */
+ uint8_t *value; /* set of bytes (maybe contain NULLs) */
int valuelen; /* length of value */
int flags; /* argument flags (eg: ATTR_NOCREATE) */
xfs_dahash_t hashval; /* hash value of name */
@@ -201,13 +201,13 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
-xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
xfs_daddr_t mapped_bno, int whichfork,
const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
-uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
+uint xfs_da_hashname(const uint8_t *name_string, int name_length);
enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
const unsigned char *name, int len);
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index f1e8d4dbb600..6d77d1a8498a 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -49,7 +49,7 @@ xfs_dir3_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
- return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
+ return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
}
static struct xfs_dir2_sf_entry *
@@ -77,7 +77,7 @@ xfs_dir3_sf_nextentry(
* not necessary. For non-filetype enable directories, the type is always
* unknown and we never store the value.
*/
-static __uint8_t
+static uint8_t
xfs_dir2_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
@@ -87,16 +87,16 @@ xfs_dir2_sfe_get_ftype(
static void
xfs_dir2_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
-static __uint8_t
+static uint8_t
xfs_dir3_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
- __uint8_t ftype;
+ uint8_t ftype;
ftype = sfep->name[sfep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
@@ -107,7 +107,7 @@ xfs_dir3_sfe_get_ftype(
static void
xfs_dir3_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
@@ -124,7 +124,7 @@ xfs_dir3_sfe_put_ftype(
static xfs_ino_t
xfs_dir2_sf_get_ino(
struct xfs_dir2_sf_hdr *hdr,
- __uint8_t *from)
+ uint8_t *from)
{
if (hdr->i8count)
return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
@@ -135,7 +135,7 @@ xfs_dir2_sf_get_ino(
static void
xfs_dir2_sf_put_ino(
struct xfs_dir2_sf_hdr *hdr,
- __uint8_t *to,
+ uint8_t *to,
xfs_ino_t ino)
{
ASSERT((ino & 0xff00000000000000ULL) == 0);
@@ -225,7 +225,7 @@ xfs_dir3_sfe_put_ino(
#define XFS_DIR3_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \
+ sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \
XFS_DIR2_DATA_ALIGN)
static int
@@ -242,7 +242,7 @@ xfs_dir3_data_entsize(
return XFS_DIR3_DATA_ENTSIZE(n);
}
-static __uint8_t
+static uint8_t
xfs_dir2_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
@@ -252,16 +252,16 @@ xfs_dir2_data_get_ftype(
static void
xfs_dir2_data_put_ftype(
struct xfs_dir2_data_entry *dep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
-static __uint8_t
+static uint8_t
xfs_dir3_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
- __uint8_t ftype = dep->name[dep->namelen];
+ uint8_t ftype = dep->name[dep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
@@ -271,7 +271,7 @@ xfs_dir3_data_get_ftype(
static void
xfs_dir3_data_put_ftype(
struct xfs_dir2_data_entry *dep,
- __uint8_t type)
+ uint8_t type)
{
ASSERT(type < XFS_DIR3_FT_MAX);
ASSERT(dep->namelen != 0);
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 9a492a9e19bd..3771edcb301d 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -111,11 +111,11 @@ struct xfs_da3_intnode {
* appropriate.
*/
struct xfs_da3_icnode_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t level;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t level;
};
/*
@@ -187,14 +187,14 @@ struct xfs_da3_icnode_hdr {
/*
* Byte offset in data block and shortform entry.
*/
-typedef __uint16_t xfs_dir2_data_off_t;
+typedef uint16_t xfs_dir2_data_off_t;
#define NULLDATAOFF 0xffffU
typedef uint xfs_dir2_data_aoff_t; /* argument form */
/*
* Offset in data space of a data entry.
*/
-typedef __uint32_t xfs_dir2_dataptr_t;
+typedef uint32_t xfs_dir2_dataptr_t;
#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
@@ -206,7 +206,7 @@ typedef xfs_off_t xfs_dir2_off_t;
/*
* Directory block number (logical dirblk in file)
*/
-typedef __uint32_t xfs_dir2_db_t;
+typedef uint32_t xfs_dir2_db_t;
#define XFS_INO32_SIZE 4
#define XFS_INO64_SIZE 8
@@ -226,9 +226,9 @@ typedef __uint32_t xfs_dir2_db_t;
* over them.
*/
typedef struct xfs_dir2_sf_hdr {
- __uint8_t count; /* count of entries */
- __uint8_t i8count; /* count of 8-byte inode #s */
- __uint8_t parent[8]; /* parent dir inode number */
+ uint8_t count; /* count of entries */
+ uint8_t i8count; /* count of 8-byte inode #s */
+ uint8_t parent[8]; /* parent dir inode number */
} __packed xfs_dir2_sf_hdr_t;
typedef struct xfs_dir2_sf_entry {
@@ -447,11 +447,11 @@ struct xfs_dir3_leaf_hdr {
};
struct xfs_dir3_icleaf_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t stale;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t stale;
};
/*
@@ -538,10 +538,10 @@ struct xfs_dir3_free {
* xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
*/
struct xfs_dir3_icfree_hdr {
- __uint32_t magic;
- __uint32_t firstdb;
- __uint32_t nvalid;
- __uint32_t nused;
+ uint32_t magic;
+ uint32_t firstdb;
+ uint32_t nvalid;
+ uint32_t nused;
};
@@ -632,10 +632,10 @@ typedef struct xfs_attr_shortform {
__u8 padding;
} hdr;
struct xfs_attr_sf_entry {
- __uint8_t namelen; /* actual length of name (no NULL) */
- __uint8_t valuelen; /* actual length of value (no NULL) */
- __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
- __uint8_t nameval[1]; /* name & value bytes concatenated */
+ uint8_t namelen; /* actual length of name (no NULL) */
+ uint8_t valuelen; /* actual length of value (no NULL) */
+ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
+ uint8_t nameval[1]; /* name & value bytes concatenated */
} list[1]; /* variable sized array */
} xfs_attr_shortform_t;
@@ -725,22 +725,22 @@ struct xfs_attr3_leafblock {
* incore, neutral version of the attribute leaf header
*/
struct xfs_attr3_icleaf_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t usedbytes;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t usedbytes;
/*
* firstused is 32-bit here instead of 16-bit like the on-disk variant
* to support maximum fsb size of 64k without overflow issues throughout
* the attr code. Instead, the overflow condition is handled on
* conversion to/from disk.
*/
- __uint32_t firstused;
+ uint32_t firstused;
__u8 holes;
struct {
- __uint16_t base;
- __uint16_t size;
+ uint16_t base;
+ uint16_t size;
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 2f389d366e93..ccf9783fd3f0 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -218,8 +218,7 @@ xfs_dir_ino_validate(
agblkno != 0 &&
ioff < (1 << mp->m_sb.sb_inopblog) &&
XFS_AGINO_TO_INO(mp, agno, agino) == ino;
- if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
- XFS_RANDOM_DIR_INO_VALIDATE))) {
+ if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index d6e6d9d16f6c..21c8f8bf94d5 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -47,9 +47,9 @@ struct xfs_dir_ops {
struct xfs_dir2_sf_entry *
(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
- __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
+ uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype);
+ uint8_t ftype);
xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
@@ -60,9 +60,9 @@ struct xfs_dir_ops {
xfs_ino_t ino);
int (*data_entsize)(int len);
- __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
+ uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
void (*data_put_ftype)(struct xfs_dir2_data_entry *dep,
- __uint8_t ftype);
+ uint8_t ftype);
__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
struct xfs_dir2_data_free *
(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index aa17cb788946..43c902f7a68d 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -139,7 +139,7 @@ xfs_dir3_block_read(
err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
return err;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index b887fb2a2bcf..27297a689d9c 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -145,7 +145,7 @@ xfs_dir3_leaf_check_int(
static bool
xfs_dir3_leaf_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -154,7 +154,7 @@ xfs_dir3_leaf_verify(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
- __uint16_t magic3;
+ uint16_t magic3;
magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
: XFS_DIR3_LEAFN_MAGIC;
@@ -178,7 +178,7 @@ xfs_dir3_leaf_verify(
static void
__read_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
@@ -195,7 +195,7 @@ __read_verify(
static void
__write_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
@@ -256,7 +256,7 @@ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
.verify_write = xfs_dir3_leafn_write_verify,
};
-static int
+int
xfs_dir3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
@@ -268,7 +268,7 @@ xfs_dir3_leaf_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
return err;
}
@@ -285,7 +285,7 @@ xfs_dir3_leafn_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
return err;
}
@@ -299,7 +299,7 @@ xfs_dir3_leaf_init(
struct xfs_trans *tp,
struct xfs_buf *bp,
xfs_ino_t owner,
- __uint16_t type)
+ uint16_t type)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -343,7 +343,7 @@ xfs_dir3_leaf_get_buf(
xfs_da_args_t *args,
xfs_dir2_db_t bno,
struct xfs_buf **bpp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_inode *dp = args->dp;
struct xfs_trans *tp = args->trans;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 75a557432d0f..682e2bf370c7 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
.verify_write = xfs_dir3_free_write_verify,
};
+/* Everything ok in the free block header? */
+static bool
+xfs_dir3_free_header_check(
+ struct xfs_inode *dp,
+ xfs_dablk_t fbno,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ unsigned int firstdb;
+ int maxbests;
+
+ maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
+ firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
+ xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
+ maxbests;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+
+ if (be32_to_cpu(hdr3->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
+ return false;
+ } else {
+ struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+
+ if (be32_to_cpu(hdr->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
+ return false;
+ }
+ return true;
+}
static int
__xfs_dir3_free_read(
@@ -168,11 +204,22 @@ __xfs_dir3_free_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+ if (err || !*bpp)
+ return err;
+
+ /* Check things that we can't do in the verifier. */
+ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
+ xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
+ xfs_verifier_error(*bpp);
+ xfs_trans_brelse(tp, *bpp);
+ return -EFSCORRUPTED;
+ }
/* try read returns without an error or *bpp if it lands in a hole */
- if (!err && tp && *bpp)
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
- return err;
+
+ return 0;
}
int
@@ -481,7 +528,7 @@ xfs_dir2_free_hdr_check(
* Stale entries are ok.
*/
xfs_dahash_t /* hash value */
-xfs_dir2_leafn_lasthash(
+xfs_dir2_leaf_lasthash(
struct xfs_inode *dp,
struct xfs_buf *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
@@ -493,7 +540,9 @@ xfs_dir2_leafn_lasthash(
dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
- leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+ leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
+ leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+ leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
if (count)
*count = leafhdr.count;
@@ -1358,8 +1407,8 @@ xfs_dir2_leafn_split(
/*
* Update last hashval in each block since we added the name.
*/
- oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
- newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
+ oldblk->hashval = xfs_dir2_leaf_lasthash(dp, oldblk->bp, NULL);
+ newblk->hashval = xfs_dir2_leaf_lasthash(dp, newblk->bp, NULL);
xfs_dir3_leaf_check(dp, oldblk->bp);
xfs_dir3_leaf_check(dp, newblk->bp);
return error;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..4badd26c47e6 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -58,6 +58,8 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
struct xfs_buf **bpp);
/* xfs_dir2_leaf.c */
+extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
@@ -69,7 +71,7 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
struct xfs_dir2_leaf_entry *ents, int *indexp,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
- struct xfs_buf **bpp, __uint16_t magic);
+ struct xfs_buf **bpp, uint16_t magic);
extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
struct xfs_buf *bp, int first, int last);
extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
@@ -93,7 +95,7 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
+extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp,
struct xfs_buf *bp, int *count);
extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
struct xfs_da_args *args, int *indexp,
@@ -125,9 +127,10 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
/* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
- size_t bufsize);
+extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct dir_context *ctx, size_t bufsize);
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..be8b9755f66a 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,112 @@ xfs_dir2_sf_check(
}
#endif /* DEBUG */
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dir2_sf_hdr *sfp;
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next_sfep;
+ char *endp;
+ const struct xfs_dir_ops *dops;
+ struct xfs_ifork *ifp;
+ xfs_ino_t ino;
+ int i;
+ int i8count;
+ int offset;
+ int size;
+ int error;
+ uint8_t filetype;
+
+ ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+ /*
+ * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+ * so we can only trust the mountpoint to have the right pointer.
+ */
+ dops = xfs_dir_get_ops(mp, NULL);
+
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+ size = ifp->if_bytes;
+
+ /*
+ * Give up if the directory is way too short.
+ */
+ if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+ size < xfs_dir2_sf_hdr_size(sfp->i8count))
+ return -EFSCORRUPTED;
+
+ endp = (char *)sfp + size;
+
+ /* Check .. entry */
+ ino = dops->sf_get_parent_ino(sfp);
+ i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+ error = xfs_dir_ino_validate(mp, ino);
+ if (error)
+ return error;
+ offset = dops->data_first_offset;
+
+ /* Check all reported entries */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ /*
+ * struct xfs_dir2_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ if (((char *)sfep + sizeof(*sfep)) >= endp)
+ return -EFSCORRUPTED;
+
+ /* Don't allow names with known bad length. */
+ if (sfep->namelen == 0)
+ return -EFSCORRUPTED;
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = dops->sf_nextentry(sfp, sfep);
+ if (endp < (char *)next_sfep)
+ return -EFSCORRUPTED;
+
+ /* Check that the offsets always increase. */
+ if (xfs_dir2_sf_get_offset(sfep) < offset)
+ return -EFSCORRUPTED;
+
+ /* Check the inode number. */
+ ino = dops->sf_get_ino(sfp, sfep);
+ i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+ error = xfs_dir_ino_validate(mp, ino);
+ if (error)
+ return error;
+
+ /* Check the file type. */
+ filetype = dops->sf_get_ftype(sfep);
+ if (filetype >= XFS_DIR3_FT_MAX)
+ return -EFSCORRUPTED;
+
+ offset = xfs_dir2_sf_get_offset(sfep) +
+ dops->data_entsize(sfep->namelen);
+
+ sfep = next_sfep;
+ }
+ if (i8count != sfp->i8count)
+ return -EFSCORRUPTED;
+ if ((void *)sfep != (void *)endp)
+ return -EFSCORRUPTED;
+
+ /* Make sure this whole thing ought to be in local format. */
+ if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
/*
* Create a new (shortform) directory.
*/
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index ac9a003dd29a..747085b4ef44 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -35,13 +35,8 @@ int
xfs_calc_dquots_per_chunk(
unsigned int nbblks) /* basic block units */
{
- unsigned int ndquots;
-
ASSERT(nbblks > 0);
- ndquots = BBTOB(nbblks);
- do_div(ndquots, sizeof(xfs_dqblk_t));
-
- return ndquots;
+ return BBTOB(nbblks) / sizeof(xfs_dqblk_t);
}
/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 6b7579e7b60a..23229f0c5b15 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -103,8 +103,8 @@ struct xfs_ifork;
* Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
- __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
- __uint32_t sb_blocksize; /* logical block size, bytes */
+ uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
+ uint32_t sb_blocksize; /* logical block size, bytes */
xfs_rfsblock_t sb_dblocks; /* number of data blocks */
xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
xfs_rtblock_t sb_rextents; /* number of realtime extents */
@@ -118,45 +118,45 @@ typedef struct xfs_sb {
xfs_agnumber_t sb_agcount; /* number of allocation groups */
xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */
xfs_extlen_t sb_logblocks; /* number of log blocks */
- __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */
- __uint16_t sb_sectsize; /* volume sector size, bytes */
- __uint16_t sb_inodesize; /* inode size, bytes */
- __uint16_t sb_inopblock; /* inodes per block */
+ uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */
+ uint16_t sb_sectsize; /* volume sector size, bytes */
+ uint16_t sb_inodesize; /* inode size, bytes */
+ uint16_t sb_inopblock; /* inodes per block */
char sb_fname[12]; /* file system name */
- __uint8_t sb_blocklog; /* log2 of sb_blocksize */
- __uint8_t sb_sectlog; /* log2 of sb_sectsize */
- __uint8_t sb_inodelog; /* log2 of sb_inodesize */
- __uint8_t sb_inopblog; /* log2 of sb_inopblock */
- __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */
- __uint8_t sb_rextslog; /* log2 of sb_rextents */
- __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */
- __uint8_t sb_imax_pct; /* max % of fs for inode space */
+ uint8_t sb_blocklog; /* log2 of sb_blocksize */
+ uint8_t sb_sectlog; /* log2 of sb_sectsize */
+ uint8_t sb_inodelog; /* log2 of sb_inodesize */
+ uint8_t sb_inopblog; /* log2 of sb_inopblock */
+ uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */
+ uint8_t sb_rextslog; /* log2 of sb_rextents */
+ uint8_t sb_inprogress; /* mkfs is in progress, don't mount */
+ uint8_t sb_imax_pct; /* max % of fs for inode space */
/* statistics */
/*
* These fields must remain contiguous. If you really
* want to change their layout, make sure you fix the
* code in xfs_trans_apply_sb_deltas().
*/
- __uint64_t sb_icount; /* allocated inodes */
- __uint64_t sb_ifree; /* free inodes */
- __uint64_t sb_fdblocks; /* free data blocks */
- __uint64_t sb_frextents; /* free realtime extents */
+ uint64_t sb_icount; /* allocated inodes */
+ uint64_t sb_ifree; /* free inodes */
+ uint64_t sb_fdblocks; /* free data blocks */
+ uint64_t sb_frextents; /* free realtime extents */
/*
* End contiguous fields.
*/
xfs_ino_t sb_uquotino; /* user quota inode */
xfs_ino_t sb_gquotino; /* group quota inode */
- __uint16_t sb_qflags; /* quota flags */
- __uint8_t sb_flags; /* misc. flags */
- __uint8_t sb_shared_vn; /* shared version number */
+ uint16_t sb_qflags; /* quota flags */
+ uint8_t sb_flags; /* misc. flags */
+ uint8_t sb_shared_vn; /* shared version number */
xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */
- __uint32_t sb_unit; /* stripe or raid unit */
- __uint32_t sb_width; /* stripe or raid width */
- __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */
- __uint8_t sb_logsectlog; /* log2 of the log sector size */
- __uint16_t sb_logsectsize; /* sector size for the log, bytes */
- __uint32_t sb_logsunit; /* stripe unit size for the log */
- __uint32_t sb_features2; /* additional feature bits */
+ uint32_t sb_unit; /* stripe or raid unit */
+ uint32_t sb_width; /* stripe or raid width */
+ uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */
+ uint8_t sb_logsectlog; /* log2 of the log sector size */
+ uint16_t sb_logsectsize; /* sector size for the log, bytes */
+ uint32_t sb_logsunit; /* stripe unit size for the log */
+ uint32_t sb_features2; /* additional feature bits */
/*
* bad features2 field as a result of failing to pad the sb structure to
@@ -167,17 +167,17 @@ typedef struct xfs_sb {
* the value in sb_features2 when formatting the incore superblock to
* the disk buffer.
*/
- __uint32_t sb_bad_features2;
+ uint32_t sb_bad_features2;
/* version 5 superblock fields start here */
/* feature masks */
- __uint32_t sb_features_compat;
- __uint32_t sb_features_ro_compat;
- __uint32_t sb_features_incompat;
- __uint32_t sb_features_log_incompat;
+ uint32_t sb_features_compat;
+ uint32_t sb_features_ro_compat;
+ uint32_t sb_features_incompat;
+ uint32_t sb_features_log_incompat;
- __uint32_t sb_crc; /* superblock crc */
+ uint32_t sb_crc; /* superblock crc */
xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
xfs_ino_t sb_pquotino; /* project quota inode */
@@ -449,7 +449,7 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
static inline bool
xfs_sb_has_compat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
@@ -465,7 +465,7 @@ xfs_sb_has_compat_feature(
static inline bool
xfs_sb_has_ro_compat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
@@ -482,7 +482,7 @@ xfs_sb_has_ro_compat_feature(
static inline bool
xfs_sb_has_incompat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
@@ -492,7 +492,7 @@ xfs_sb_has_incompat_feature(
static inline bool
xfs_sb_has_incompat_log_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
@@ -594,8 +594,8 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
*/
#define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog)
#define XFS_B_TO_FSB(mp,b) \
- ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+ ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask)
/*
@@ -930,10 +930,8 @@ static inline uint xfs_dinode_size(int version)
/*
* The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
* Since the pathconf interface is signed, we use 2^31 - 1 instead.
- * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
-#define XFS_MAXLINK_1 65535U
/*
* Values for di_format
@@ -1074,7 +1072,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
* next agno_log bits - ag number
* high agno_log-agblklog-inopblog bits - 0
*/
-#define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1)
+#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1)
#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log
@@ -1213,6 +1211,7 @@ struct xfs_dsymlink_hdr {
#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
+#define XFS_SYMLINK_MAXLEN 1024
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 3 extents back from
@@ -1271,16 +1270,16 @@ typedef __be32 xfs_alloc_ptr_t;
#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
-typedef __uint64_t xfs_inofree_t;
+typedef uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
-#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
+#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(uint16_t))
#define XFS_INODES_PER_HOLEMASK_BIT \
- (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+ (XFS_INODES_PER_CHUNK / (NBBY * sizeof(uint16_t)))
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{
@@ -1314,9 +1313,9 @@ typedef struct xfs_inobt_rec {
typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
- __uint16_t ir_holemask; /* hole mask for sparse chunks */
- __uint8_t ir_count; /* total inode count */
- __uint8_t ir_freecount; /* count of free inodes (set bits) */
+ uint16_t ir_holemask; /* hole mask for sparse chunks */
+ uint8_t ir_count; /* total inode count */
+ uint8_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t;
@@ -1399,15 +1398,15 @@ struct xfs_rmap_rec {
* rm_offset:54-60 aren't used and should be zero
* rm_offset:0-53 is the block offset within the inode
*/
-#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63)
-#define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62)
-#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61)
+#define XFS_RMAP_OFF_ATTR_FORK ((uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT_BLOCK ((uint64_t)1ULL << 62)
+#define XFS_RMAP_OFF_UNWRITTEN ((uint64_t)1ULL << 61)
-#define XFS_RMAP_LEN_MAX ((__uint32_t)~0U)
+#define XFS_RMAP_LEN_MAX ((uint32_t)~0U)
#define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \
XFS_RMAP_OFF_BMBT_BLOCK | \
XFS_RMAP_OFF_UNWRITTEN)
-#define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL)
+#define XFS_RMAP_OFF_MASK ((uint64_t)0x3FFFFFFFFFFFFFULL)
#define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK)
@@ -1433,8 +1432,8 @@ struct xfs_rmap_rec {
struct xfs_rmap_irec {
xfs_agblock_t rm_startblock; /* extent start block */
xfs_extlen_t rm_blockcount; /* extent length */
- __uint64_t rm_owner; /* extent owner */
- __uint64_t rm_offset; /* offset within the owner */
+ uint64_t rm_owner; /* extent owner */
+ uint64_t rm_offset; /* offset within the owner */
unsigned int rm_flags; /* state flags */
};
@@ -1546,11 +1545,11 @@ typedef struct xfs_bmbt_rec {
__be64 l0, l1;
} xfs_bmbt_rec_t;
-typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
+typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
typedef struct xfs_bmbt_rec_host {
- __uint64_t l0, l1;
+ uint64_t l0, l1;
} xfs_bmbt_rec_host_t;
/*
@@ -1578,19 +1577,10 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
}
/*
- * Possible extent formats.
- */
-typedef enum {
- XFS_EXTFMT_NOSTATE = 0,
- XFS_EXTFMT_HASSTATE
-} xfs_exntfmt_t;
-
-/*
* Possible extent states.
*/
typedef enum {
XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
- XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID
} xfs_exntst_t;
/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b72dc821d78b..8c61f21535d4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -92,6 +92,18 @@ struct getbmapx {
#define BMV_OF_LAST 0x4 /* segment is the last in the file */
#define BMV_OF_SHARED 0x8 /* segment shared with another file */
+/* fmr_owner special values for FS_IOC_GETFSMAP */
+#define XFS_FMR_OWN_FREE FMR_OWN_FREE /* free space */
+#define XFS_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */
+#define XFS_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */
+#define XFS_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */
+#define XFS_FMR_OWN_AG FMR_OWNER('X', 3) /* per-AG metadata */
+#define XFS_FMR_OWN_INOBT FMR_OWNER('X', 4) /* inode btree blocks */
+#define XFS_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */
+#define XFS_FMR_OWN_REFC FMR_OWNER('X', 6) /* refcount tree */
+#define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */
+#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
+
/*
* Structure for XFS_IOC_FSSETDM.
* For use by backup and restore programs to set the XFS on-disk inode
@@ -290,10 +302,10 @@ typedef struct xfs_bstat {
* and using two 16bit values to hold new 32bit projid was choosen
* to retain compatibility with "old" filesystems).
*/
-static inline __uint32_t
+static inline uint32_t
bstat_get_projid(struct xfs_bstat *bs)
{
- return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
+ return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
}
/*
@@ -434,19 +446,15 @@ typedef struct xfs_handle {
} xfs_handle_t;
#define ha_fsid ha_u._ha_fsid
-#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \
- - (char *) &(handle)) \
- + (handle).ha_fid.fid_len)
-
/*
* Structure passed to XFS_IOC_SWAPEXT
*/
typedef struct xfs_swapext
{
- __int64_t sx_version; /* version */
+ int64_t sx_version; /* version */
#define XFS_SX_VERSION 0
- __int64_t sx_fdtarget; /* fd of target file */
- __int64_t sx_fdtmp; /* fd of tmp file */
+ int64_t sx_fdtarget; /* fd of target file */
+ int64_t sx_fdtmp; /* fd of tmp file */
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
@@ -502,6 +510,7 @@ typedef struct xfs_swapext
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
+/* XFS_IOC_GETFSMAP ------ hoisted 59 */
/*
* ioctl commands that replace IRIX syssgi()'s
@@ -533,7 +542,7 @@ typedef struct xfs_swapext
#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
-#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f272abff11e1..ffd5a15d1bb6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -46,13 +46,12 @@
/*
* Allocation group level functions.
*/
-static inline int
+int
xfs_ialloc_cluster_alignment(
struct xfs_mount *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
return mp->m_sb.sb_inoalignmt;
return 1;
}
@@ -99,24 +98,15 @@ xfs_inobt_update(
return xfs_btree_update(cur, &rec);
}
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_inobt_get_rec(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_inobt_rec_incore_t *irec, /* btree record */
- int *stat) /* output: success/failure */
+/* Convert on-disk btree record to incore inobt record. */
+void
+xfs_inobt_btrec_to_irec(
+ struct xfs_mount *mp,
+ union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec)
{
- union xfs_btree_rec *rec;
- int error;
-
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || *stat == 0)
- return error;
-
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -131,6 +121,25 @@ xfs_inobt_get_rec(
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_inobt_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_inobt_rec_incore *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
return 0;
}
@@ -141,9 +150,9 @@ xfs_inobt_get_rec(
STATIC int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
- __uint16_t holemask,
- __uint8_t count,
- __int32_t freecount,
+ uint16_t holemask,
+ uint8_t count,
+ int32_t freecount,
xfs_inofree_t free,
int *stat)
{
@@ -2543,8 +2552,7 @@ xfs_agi_read_verify(
!xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
- XFS_ERRTAG_IALLOC_READ_AGI,
- XFS_RANDOM_IALLOC_READ_AGI))
+ XFS_ERRTAG_IALLOC_READ_AGI))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 0bb89669fc07..b32cfb5aeb5b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -168,5 +168,10 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
+union xfs_btree_rec;
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec);
+
+int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 7c471881c9a6..317caba9faa6 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -175,6 +175,18 @@ xfs_inobt_init_key_from_rec(
}
STATIC void
+xfs_inobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->inobt.ir_startino);
+ x += XFS_INODES_PER_CHUNK - 1;
+ key->inobt.ir_startino = cpu_to_be32(x);
+}
+
+STATIC void
xfs_inobt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -219,15 +231,25 @@ xfs_finobt_init_ptr_from_cur(
ptr->s = agi->agi_free_root;
}
-STATIC __int64_t
+STATIC int64_t
xfs_inobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+ return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
cur->bc_rec.i.ir_startino;
}
+STATIC int64_t
+xfs_inobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
+ be32_to_cpu(k2->inobt.ir_startino);
+}
+
static int
xfs_inobt_verify(
struct xfs_buf *bp)
@@ -302,7 +324,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
.verify_write = xfs_inobt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_inobt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -322,7 +343,6 @@ xfs_inobt_recs_inorder(
return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
be32_to_cpu(r2->inobt.ir_startino);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
@@ -335,14 +355,14 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -356,14 +376,14 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d93f9d918cfc..378f8fbc91a7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -105,8 +105,7 @@ xfs_inode_buf_verify(
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
xfs_dinode_good_version(mp, dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
- XFS_ERRTAG_ITOBP_INOTOBP,
- XFS_RANDOM_ITOBP_INOTOBP))) {
+ XFS_ERRTAG_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
xfs_buf_ioerror(bp, -EIO);
@@ -381,7 +380,7 @@ xfs_log_dinode_to_disk(
}
}
-static bool
+bool
xfs_dinode_verify(
struct xfs_mount *mp,
xfs_ino_t ino,
@@ -444,7 +443,7 @@ xfs_dinode_calc_crc(
struct xfs_mount *mp,
struct xfs_dinode *dip)
{
- __uint32_t crc;
+ uint32_t crc;
if (dip->di_version < 3)
return;
@@ -508,7 +507,7 @@ xfs_iread(
/* even unallocated inodes are verified */
if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
- xfs_alert(mp, "%s: validation failed for inode %lld failed",
+ xfs_alert(mp, "%s: validation failed for inode %lld",
__func__, ip->i_ino);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0afbce7..a9c97a356c30 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,26 +28,26 @@ struct xfs_dinode;
* format specific structures at the appropriate time.
*/
struct xfs_icdinode {
- __int8_t di_version; /* inode version */
- __int8_t di_format; /* format of di_c data */
- __uint16_t di_flushiter; /* incremented on flush */
- __uint32_t di_uid; /* owner's user id */
- __uint32_t di_gid; /* owner's group id */
- __uint16_t di_projid_lo; /* lower part of owner's project id */
- __uint16_t di_projid_hi; /* higher part of owner's project id */
+ int8_t di_version; /* inode version */
+ int8_t di_format; /* format of di_c data */
+ uint16_t di_flushiter; /* incremented on flush */
+ uint32_t di_uid; /* owner's user id */
+ uint32_t di_gid; /* owner's group id */
+ uint16_t di_projid_lo; /* lower part of owner's project id */
+ uint16_t di_projid_hi; /* higher part of owner's project id */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
- __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
- __int8_t di_aformat; /* format of attr fork's data */
- __uint32_t di_dmevmask; /* DMIG event mask */
- __uint16_t di_dmstate; /* DMIG state info */
- __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
+ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
+ int8_t di_aformat; /* format of attr fork's data */
+ uint32_t di_dmevmask; /* DMIG event mask */
+ uint16_t di_dmstate; /* DMIG state info */
+ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
- __uint64_t di_flags2; /* more random flags */
- __uint32_t di_cowextsize; /* basic cow extent size for file */
+ uint64_t di_flags2; /* more random flags */
+ uint32_t di_cowextsize; /* basic cow extent size for file */
xfs_ictimestamp_t di_crtime; /* time created */
};
@@ -82,4 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
+bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+ struct xfs_dinode *dip);
+
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 222e103356c6..0e80f34fe97c 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -26,12 +26,15 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
+#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_ifork_zone;
@@ -39,35 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
-#ifdef DEBUG
-/*
- * Make sure that the extents in the given memory buffer
- * are valid.
- */
-void
-xfs_validate_extents(
- xfs_ifork_t *ifp,
- int nrecs,
- xfs_exntfmt_t fmt)
-{
- xfs_bmbt_irec_t irec;
- xfs_bmbt_rec_host_t rec;
- int i;
-
- for (i = 0; i < nrecs; i++) {
- xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
- rec.l0 = get_unaligned(&ep->l0);
- rec.l1 = get_unaligned(&ep->l1);
- xfs_bmbt_get_all(&rec, &irec);
- if (fmt == XFS_EXTFMT_NOSTATE)
- ASSERT(irec.br_state == XFS_EXT_NORM);
- }
-}
-#else /* DEBUG */
-#define xfs_validate_extents(ifp, nrecs, fmt)
-#endif /* DEBUG */
-
-
/*
* Move inode type and inode format specific information from the
* on-disk inode to the in-core inode. For fifos, devs, and sockets
@@ -209,6 +183,16 @@ xfs_iformat_fork(
if (error)
return error;
+ /* Check inline dir contents. */
+ if (S_ISDIR(VFS_I(ip)->i_mode) &&
+ dip->di_format == XFS_DINODE_FMT_LOCAL) {
+ error = xfs_dir2_sf_verify(ip);
+ if (error) {
+ xfs_idestroy_fork(ip, XFS_DATA_FORK);
+ return error;
+ }
+ }
+
if (xfs_is_reflink_inode(ip)) {
ASSERT(ip->i_cowfp == NULL);
xfs_ifork_init_cow(ip);
@@ -319,7 +303,6 @@ xfs_iformat_local(
int whichfork,
int size)
{
-
/*
* If the size is unreasonable, then something
* is wrong and we just bail out rather than crash in
@@ -340,40 +323,33 @@ xfs_iformat_local(
}
/*
- * The file consists of a set of extents all
- * of which fit into the on-disk inode.
- * If there are few enough extents to fit into
- * the if_inline_ext, then copy them there.
- * Otherwise allocate a buffer for them and copy
- * them into it. Either way, set if_extents
- * to point at the extents.
+ * The file consists of a set of extents all of which fit into the on-disk
+ * inode. If there are few enough extents to fit into the if_inline_ext, then
+ * copy them there. Otherwise allocate a buffer for them and copy them into it.
+ * Either way, set if_extents to point at the extents.
*/
STATIC int
xfs_iformat_extents(
- xfs_inode_t *ip,
- xfs_dinode_t *dip,
- int whichfork)
+ struct xfs_inode *ip,
+ struct xfs_dinode *dip,
+ int whichfork)
{
- xfs_bmbt_rec_t *dp;
- xfs_ifork_t *ifp;
- int nex;
- int size;
- int i;
-
- ifp = XFS_IFORK_PTR(ip, whichfork);
- nex = XFS_DFORK_NEXTENTS(dip, whichfork);
- size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ int nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+ int size = nex * sizeof(xfs_bmbt_rec_t);
+ struct xfs_bmbt_rec *dp;
+ int i;
/*
- * If the number of extents is unreasonable, then something
- * is wrong and we just bail out rather than crash in
- * kmem_alloc() or memcpy() below.
+ * If the number of extents is unreasonable, then something is wrong and
+ * we just bail out rather than crash in kmem_alloc() or memcpy() below.
*/
- if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
(unsigned long long) ip->i_ino, nex);
XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
+ mp, dip);
return -EFSCORRUPTED;
}
@@ -388,22 +364,17 @@ xfs_iformat_extents(
ifp->if_bytes = size;
if (size) {
dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
- xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
for (i = 0; i < nex; i++, dp++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
ep->l0 = get_unaligned_be64(&dp->l0);
ep->l1 = get_unaligned_be64(&dp->l1);
+ if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) {
+ XFS_ERROR_REPORT("xfs_iformat_extents(2)",
+ XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
}
XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
- if (whichfork != XFS_DATA_FORK ||
- XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
- if (unlikely(xfs_check_nostate_extents(
- ifp, 0, nex))) {
- XFS_ERROR_REPORT("xfs_iformat_extents(2)",
- XFS_ERRLEVEL_LOW,
- ip->i_mount);
- return -EFSCORRUPTED;
- }
}
ifp->if_flags |= XFS_IFEXTENTS;
return 0;
@@ -429,11 +400,13 @@ xfs_iformat_btree(
/* REFERENCED */
int nrecs;
int size;
+ int level;
ifp = XFS_IFORK_PTR(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
size = XFS_BMAP_BROOT_SPACE(mp, dfp);
nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
/*
* blow out if -- fork has less extents than can fit in
@@ -446,7 +419,8 @@ xfs_iformat_btree(
XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, mp, whichfork) ||
- XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
+ level == 0 || level > XFS_BTREE_MAXLEVELS) {
xfs_warn(mp, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -497,15 +471,13 @@ xfs_iread_extents(
* We know that the size is valid (it's checked in iformat_btree)
*/
ifp->if_bytes = ifp->if_real_bytes = 0;
- ifp->if_flags |= XFS_IFEXTENTS;
xfs_iext_add(ifp, 0, nextents);
error = xfs_bmap_read_extents(tp, ip, whichfork);
if (error) {
xfs_iext_destroy(ifp);
- ifp->if_flags &= ~XFS_IFEXTENTS;
return error;
}
- xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+ ifp->if_flags |= XFS_IFEXTENTS;
return 0;
}
/*
@@ -823,6 +795,9 @@ xfs_iextents_copy(
copied = 0;
for (i = 0; i < nrecs; i++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+
+ ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep));
+
start_block = xfs_bmbt_get_startblock(ep);
if (isnullstartblock(start_block)) {
/*
@@ -838,7 +813,6 @@ xfs_iextents_copy(
copied++;
}
ASSERT(copied != 0);
- xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
return (copied * (uint)sizeof(xfs_bmbt_rec_t));
}
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 7ae571f8e34a..8372e9bcd7b6 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -31,7 +31,7 @@ struct xfs_trans_res;
* through all the log items definitions and everything they encode into the
* log.
*/
-typedef __uint32_t xlog_tid_t;
+typedef uint32_t xlog_tid_t;
#define XLOG_MIN_ICLOGS 2
#define XLOG_MAX_ICLOGS 8
@@ -211,7 +211,7 @@ typedef struct xfs_log_iovec {
typedef struct xfs_trans_header {
uint th_magic; /* magic number */
uint th_type; /* transaction type */
- __int32_t th_tid; /* transaction id (unused) */
+ int32_t th_tid; /* transaction id (unused) */
uint th_num_items; /* num items logged by trans */
} xfs_trans_header_t;
@@ -265,52 +265,52 @@ typedef struct xfs_trans_header {
* must be added on to the end.
*/
typedef struct xfs_inode_log_format {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
typedef struct xfs_inode_log_format_32 {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} __attribute__((packed)) xfs_inode_log_format_32_t;
typedef struct xfs_inode_log_format_64 {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint32_t ilf_pad; /* pad for 64 bit boundary */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint32_t ilf_pad; /* pad for 64 bit boundary */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
@@ -379,8 +379,8 @@ static inline int xfs_ilog_fdata(int w)
* information.
*/
typedef struct xfs_ictimestamp {
- __int32_t t_sec; /* timestamp seconds */
- __int32_t t_nsec; /* timestamp nanoseconds */
+ int32_t t_sec; /* timestamp seconds */
+ int32_t t_nsec; /* timestamp nanoseconds */
} xfs_ictimestamp_t;
/*
@@ -388,18 +388,18 @@ typedef struct xfs_ictimestamp {
* kept identical to struct xfs_dinode except for the endianness annotations.
*/
struct xfs_log_dinode {
- __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
- __uint16_t di_mode; /* mode and type of file */
- __int8_t di_version; /* inode version */
- __int8_t di_format; /* format of di_c data */
- __uint8_t di_pad3[2]; /* unused in v2/3 inodes */
- __uint32_t di_uid; /* owner's user id */
- __uint32_t di_gid; /* owner's group id */
- __uint32_t di_nlink; /* number of links to file */
- __uint16_t di_projid_lo; /* lower part of owner's project id */
- __uint16_t di_projid_hi; /* higher part of owner's project id */
- __uint8_t di_pad[6]; /* unused, zeroed space */
- __uint16_t di_flushiter; /* incremented on flush */
+ uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
+ uint16_t di_mode; /* mode and type of file */
+ int8_t di_version; /* inode version */
+ int8_t di_format; /* format of di_c data */
+ uint8_t di_pad3[2]; /* unused in v2/3 inodes */
+ uint32_t di_uid; /* owner's user id */
+ uint32_t di_gid; /* owner's group id */
+ uint32_t di_nlink; /* number of links to file */
+ uint16_t di_projid_lo; /* lower part of owner's project id */
+ uint16_t di_projid_hi; /* higher part of owner's project id */
+ uint8_t di_pad[6]; /* unused, zeroed space */
+ uint16_t di_flushiter; /* incremented on flush */
xfs_ictimestamp_t di_atime; /* time last accessed */
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
@@ -408,23 +408,23 @@ struct xfs_log_dinode {
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
- __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
- __int8_t di_aformat; /* format of attr fork's data */
- __uint32_t di_dmevmask; /* DMIG event mask */
- __uint16_t di_dmstate; /* DMIG state info */
- __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
- __uint32_t di_gen; /* generation number */
+ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
+ int8_t di_aformat; /* format of attr fork's data */
+ uint32_t di_dmevmask; /* DMIG event mask */
+ uint16_t di_dmstate; /* DMIG state info */
+ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
+ uint32_t di_gen; /* generation number */
/* di_next_unlinked is the only non-core field in the old dinode */
xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
/* start of the extended dinode, writable fields */
- __uint32_t di_crc; /* CRC of the inode */
- __uint64_t di_changecount; /* number of attribute changes */
+ uint32_t di_crc; /* CRC of the inode */
+ uint64_t di_changecount; /* number of attribute changes */
xfs_lsn_t di_lsn; /* flush sequence */
- __uint64_t di_flags2; /* more random flags */
- __uint32_t di_cowextsize; /* basic cow extent size for file */
- __uint8_t di_pad2[12]; /* more padding for future expansion */
+ uint64_t di_flags2; /* more random flags */
+ uint32_t di_cowextsize; /* basic cow extent size for file */
+ uint8_t di_pad2[12]; /* more padding for future expansion */
/* fields only written to during inode creation */
xfs_ictimestamp_t di_crtime; /* time created */
@@ -483,7 +483,7 @@ typedef struct xfs_buf_log_format {
unsigned short blf_size; /* size of this item */
unsigned short blf_flags; /* misc state */
unsigned short blf_len; /* number of blocks in this buf */
- __int64_t blf_blkno; /* starting blkno of this buf */
+ int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
} xfs_buf_log_format_t;
@@ -533,7 +533,7 @@ xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
}
-static inline __uint16_t
+static inline uint16_t
xfs_blft_from_flags(struct xfs_buf_log_format *blf)
{
return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
@@ -554,14 +554,14 @@ typedef struct xfs_extent {
* conversion routine.
*/
typedef struct xfs_extent_32 {
- __uint64_t ext_start;
- __uint32_t ext_len;
+ uint64_t ext_start;
+ uint32_t ext_len;
} __attribute__((packed)) xfs_extent_32_t;
typedef struct xfs_extent_64 {
- __uint64_t ext_start;
- __uint32_t ext_len;
- __uint32_t ext_pad;
+ uint64_t ext_start;
+ uint32_t ext_len;
+ uint32_t ext_pad;
} xfs_extent_64_t;
/*
@@ -570,26 +570,26 @@ typedef struct xfs_extent_64 {
* size is given by efi_nextents.
*/
typedef struct xfs_efi_log_format {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_t;
typedef struct xfs_efi_log_format_32 {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_32_t efi_extents[1]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
typedef struct xfs_efi_log_format_64 {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_64_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_64_t;
@@ -599,26 +599,26 @@ typedef struct xfs_efi_log_format_64 {
* size is given by efd_nextents;
*/
typedef struct xfs_efd_log_format {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_t;
typedef struct xfs_efd_log_format_32 {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_32_t efd_extents[1]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
typedef struct xfs_efd_log_format_64 {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_64_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_64_t;
@@ -626,11 +626,11 @@ typedef struct xfs_efd_log_format_64 {
* RUI/RUD (reverse mapping) log format definitions
*/
struct xfs_map_extent {
- __uint64_t me_owner;
- __uint64_t me_startblock;
- __uint64_t me_startoff;
- __uint32_t me_len;
- __uint32_t me_flags;
+ uint64_t me_owner;
+ uint64_t me_startblock;
+ uint64_t me_startoff;
+ uint32_t me_len;
+ uint32_t me_flags;
};
/* rmap me_flags: upper bits are flags, lower byte is type code */
@@ -659,10 +659,10 @@ struct xfs_map_extent {
* size is given by rui_nextents.
*/
struct xfs_rui_log_format {
- __uint16_t rui_type; /* rui log item type */
- __uint16_t rui_size; /* size of this item */
- __uint32_t rui_nextents; /* # extents to free */
- __uint64_t rui_id; /* rui identifier */
+ uint16_t rui_type; /* rui log item type */
+ uint16_t rui_size; /* size of this item */
+ uint32_t rui_nextents; /* # extents to free */
+ uint64_t rui_id; /* rui identifier */
struct xfs_map_extent rui_extents[]; /* array of extents to rmap */
};
@@ -680,19 +680,19 @@ xfs_rui_log_format_sizeof(
* size is given by rud_nextents;
*/
struct xfs_rud_log_format {
- __uint16_t rud_type; /* rud log item type */
- __uint16_t rud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t rud_rui_id; /* id of corresponding rui */
+ uint16_t rud_type; /* rud log item type */
+ uint16_t rud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t rud_rui_id; /* id of corresponding rui */
};
/*
* CUI/CUD (refcount update) log format definitions
*/
struct xfs_phys_extent {
- __uint64_t pe_startblock;
- __uint32_t pe_len;
- __uint32_t pe_flags;
+ uint64_t pe_startblock;
+ uint32_t pe_len;
+ uint32_t pe_flags;
};
/* refcount pe_flags: upper bits are flags, lower byte is type code */
@@ -707,10 +707,10 @@ struct xfs_phys_extent {
* size is given by cui_nextents.
*/
struct xfs_cui_log_format {
- __uint16_t cui_type; /* cui log item type */
- __uint16_t cui_size; /* size of this item */
- __uint32_t cui_nextents; /* # extents to free */
- __uint64_t cui_id; /* cui identifier */
+ uint16_t cui_type; /* cui log item type */
+ uint16_t cui_size; /* size of this item */
+ uint32_t cui_nextents; /* # extents to free */
+ uint64_t cui_id; /* cui identifier */
struct xfs_phys_extent cui_extents[]; /* array of extents */
};
@@ -728,10 +728,10 @@ xfs_cui_log_format_sizeof(
* size is given by cud_nextents;
*/
struct xfs_cud_log_format {
- __uint16_t cud_type; /* cud log item type */
- __uint16_t cud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t cud_cui_id; /* id of corresponding cui */
+ uint16_t cud_type; /* cud log item type */
+ uint16_t cud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t cud_cui_id; /* id of corresponding cui */
};
/*
@@ -755,10 +755,10 @@ struct xfs_cud_log_format {
* size is given by bui_nextents.
*/
struct xfs_bui_log_format {
- __uint16_t bui_type; /* bui log item type */
- __uint16_t bui_size; /* size of this item */
- __uint32_t bui_nextents; /* # extents to free */
- __uint64_t bui_id; /* bui identifier */
+ uint16_t bui_type; /* bui log item type */
+ uint16_t bui_size; /* size of this item */
+ uint32_t bui_nextents; /* # extents to free */
+ uint64_t bui_id; /* bui identifier */
struct xfs_map_extent bui_extents[]; /* array of extents to bmap */
};
@@ -776,10 +776,10 @@ xfs_bui_log_format_sizeof(
* size is given by bud_nextents;
*/
struct xfs_bud_log_format {
- __uint16_t bud_type; /* bud log item type */
- __uint16_t bud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t bud_bui_id; /* id of corresponding bui */
+ uint16_t bud_type; /* bud log item type */
+ uint16_t bud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t bud_bui_id; /* id of corresponding bui */
};
/*
@@ -789,12 +789,12 @@ struct xfs_bud_log_format {
* 32 bits : log_recovery code assumes that.
*/
typedef struct xfs_dq_logformat {
- __uint16_t qlf_type; /* dquot log item type */
- __uint16_t qlf_size; /* size of this item */
+ uint16_t qlf_type; /* dquot log item type */
+ uint16_t qlf_size; /* size of this item */
xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
- __int64_t qlf_blkno; /* blkno of dquot buffer */
- __int32_t qlf_len; /* len of dquot buffer */
- __uint32_t qlf_boffset; /* off of dquot in buffer */
+ int64_t qlf_blkno; /* blkno of dquot buffer */
+ int32_t qlf_len; /* len of dquot buffer */
+ uint32_t qlf_boffset; /* off of dquot in buffer */
} xfs_dq_logformat_t;
/*
@@ -853,8 +853,8 @@ typedef struct xfs_qoff_logformat {
* decoding can be done correctly.
*/
struct xfs_icreate_log {
- __uint16_t icl_type; /* type of log format structure */
- __uint16_t icl_size; /* size of log format structure */
+ uint16_t icl_type; /* type of log format structure */
+ uint16_t icl_size; /* size of log format structure */
__be32 icl_ag; /* ag being allocated in */
__be32 icl_agbno; /* start block of inode range */
__be32 icl_count; /* number of inodes to initialise */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index d9f65e2d5cc8..66948a9fd486 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -26,7 +26,7 @@
#define XLOG_RHASH_SIZE 16
#define XLOG_RHASH_SHIFT 2
#define XLOG_RHASH(tid) \
- ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+ ((((uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
@@ -42,7 +42,6 @@ typedef struct xlog_recover_item {
xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
} xlog_recover_item_t;
-struct xlog_tid;
typedef struct xlog_recover {
struct hlist_node r_list;
xlog_tid_t r_log_tid; /* log's transaction id */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 8eed51275bb3..d69c772271cb 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -27,8 +27,8 @@
* they may need 64-bit accounting. Hence, 64-bit quota-counters,
* and quota-limits. This is a waste in the common case, but hey ...
*/
-typedef __uint64_t xfs_qcnt_t;
-typedef __uint16_t xfs_qwarncnt_t;
+typedef uint64_t xfs_qcnt_t;
+typedef uint16_t xfs_qwarncnt_t;
/*
* flags for q_flags field in the dquot.
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef33cd4c..900ea231f9a3 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
}
/*
- * While we're adjusting the refcounts records of an extent, we have
- * to keep an eye on the number of extents we're dirtying -- run too
- * many in a single transaction and we'll exceed the transaction's
- * reservation and crash the fs. Each record adds 12 bytes to the
- * log (plus any key updates) so we'll conservatively assume 24 bytes
- * per record. We must also leave space for btree splits on both ends
- * of the range and space for the CUD and a new CUI.
- *
* XXX: This is a pretty hand-wavy estimate. The penalty for guessing
* true incorrectly is a shutdown FS; the penalty for guessing false
* incorrectly is more transaction rolls than might be necessary.
@@ -813,8 +805,7 @@ xfs_refcount_still_have_space(
*/
if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
XFS_TEST_ERROR(false, cur->bc_mp,
- XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE,
- XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE))
+ XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
return false;
if (cur->bc_private.a.priv.refc.nr_ops == 0)
@@ -822,7 +813,7 @@ xfs_refcount_still_have_space(
else if (overhead > cur->bc_tp->t_log_res)
return false;
return cur->bc_tp->t_log_res - overhead >
- cur->bc_private.a.priv.refc.nr_ops * 32;
+ cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
}
/*
@@ -1076,8 +1067,7 @@ xfs_refcount_finish_one(
blockcount);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_REFCOUNT_FINISH_ONE,
- XFS_RANDOM_REFCOUNT_FINISH_ONE))
+ XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
/*
@@ -1629,13 +1619,28 @@ xfs_refcount_recover_cow_leftovers(
if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
return -EOPNOTSUPP;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ INIT_LIST_HEAD(&debris);
+
+ /*
+ * In this first part, we use an empty transaction to gather up
+ * all the leftover CoW extents so that we can subsequently
+ * delete them. The empty transaction is used to avoid
+ * a buffer lock deadlock if there happens to be a loop in the
+ * refcountbt because we're allowed to re-grab a buffer that is
+ * already attached to our transaction. When we're done
+ * recording the CoW debris we cancel the (empty) transaction
+ * and everything goes away cleanly.
+ */
+ error = xfs_trans_alloc_empty(mp, &tp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ goto out_trans;
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
/* Find all the leftover CoW staging extents. */
- INIT_LIST_HEAD(&debris);
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1650,11 @@ xfs_refcount_recover_cow_leftovers(
if (error)
goto out_cursor;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_brelse(tp, agbp);
+ xfs_trans_cancel(tp);
/* Now iterate the list to free the leftovers */
- list_for_each_entry(rr, &debris, rr_list) {
+ list_for_each_entry_safe(rr, n, &debris, rr_list) {
/* Set up transaction. */
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
if (error)
@@ -1676,8 +1682,16 @@ xfs_refcount_recover_cow_leftovers(
error = xfs_trans_commit(tp);
if (error)
goto out_free;
+
+ list_del(&rr->rr_list);
+ kmem_free(rr);
}
+ return error;
+out_defer:
+ xfs_defer_cancel(&dfops);
+out_trans:
+ xfs_trans_cancel(tp);
out_free:
/* Free the leftover list */
list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1702,6 @@ out_free:
out_cursor:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- xfs_buf_relse(agbp);
- goto out_free;
-
-out_defer:
- xfs_defer_cancel(&dfops);
- xfs_trans_cancel(tp);
- goto out_free;
+ xfs_trans_brelse(tp, agbp);
+ goto out_trans;
}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 098dc668ab2c..eafb9d1f3b37 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
xfs_agnumber_t agno);
+/*
+ * While we're adjusting the refcounts records of an extent, we have
+ * to keep an eye on the number of extents we're dirtying -- run too
+ * many in a single transaction and we'll exceed the transaction's
+ * reservation and crash the fs. Each record adds 12 bytes to the
+ * log (plus any key updates) so we'll conservatively assume 32 bytes
+ * per record. We must also leave space for btree splits on both ends
+ * of the range and space for the CUD and a new CUI.
+ */
+#define XFS_REFCOUNT_ITEM_OVERHEAD 32
+
+static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
+{
+ return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
+}
+
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 50add5272807..3c59dd3d58d7 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -202,7 +202,7 @@ xfs_refcountbt_init_ptr_from_cur(
ptr->s = agf->agf_refcount_root;
}
-STATIC __int64_t
+STATIC int64_t
xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
@@ -210,16 +210,16 @@ xfs_refcountbt_key_diff(
struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
struct xfs_refcount_key *kp = &key->refc;
- return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
}
-STATIC __int64_t
+STATIC int64_t
xfs_refcountbt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) -
+ return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
be32_to_cpu(k2->refc.rc_startblock);
}
@@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
.verify_write = xfs_refcountbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_refcountbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder(
be32_to_cpu(r1->refc.rc_blockcount) <=
be32_to_cpu(r2->refc.rc_startblock);
}
-#endif
static const struct xfs_btree_ops xfs_refcountbt_ops = {
.rec_len = sizeof(struct xfs_refcount_rec),
@@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
.key_diff = xfs_refcountbt_key_diff,
.buf_ops = &xfs_refcountbt_buf_ops,
.diff_two_keys = xfs_refcountbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_refcountbt_keys_inorder,
.recs_inorder = xfs_refcountbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3a8cc7139912..55c88a732690 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -179,7 +179,8 @@ done:
return error;
}
-static int
+/* Convert an internal btree record to an rmap record. */
+int
xfs_rmap_btrec_to_irec(
union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec)
@@ -2001,14 +2002,14 @@ xfs_rmap_query_range_helper(
/* Find all rmaps between two keys. */
int
xfs_rmap_query_range(
- struct xfs_btree_cur *cur,
- struct xfs_rmap_irec *low_rec,
- struct xfs_rmap_irec *high_rec,
- xfs_rmap_query_range_fn fn,
- void *priv)
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *low_rec,
+ struct xfs_rmap_irec *high_rec,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
struct xfs_rmap_query_range_info query;
low_brec.r = *low_rec;
@@ -2019,6 +2020,20 @@ xfs_rmap_query_range(
xfs_rmap_query_range_helper, &query);
}
+/* Find all rmaps. */
+int
+xfs_rmap_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_rmap_query_range_info query;
+
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
+}
+
/* Clean up after calling xfs_rmap_finish_one. */
void
xfs_rmap_finish_one_cleanup(
@@ -2047,7 +2062,7 @@ int
xfs_rmap_finish_one(
struct xfs_trans *tp,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
@@ -2072,8 +2087,7 @@ xfs_rmap_finish_one(
startoff, blockcount, state);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_RMAP_FINISH_ONE,
- XFS_RANDOM_RMAP_FINISH_ONE))
+ XFS_ERRTAG_RMAP_FINISH_ONE))
return -EIO;
/*
@@ -2168,7 +2182,7 @@ __xfs_rmap_add(
struct xfs_mount *mp,
struct xfs_defer_ops *dfops,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
struct xfs_bmbt_irec *bmap)
{
@@ -2252,7 +2266,7 @@ xfs_rmap_alloc_extent(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- __uint64_t owner)
+ uint64_t owner)
{
struct xfs_bmbt_irec bmap;
@@ -2276,7 +2290,7 @@ xfs_rmap_free_extent(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- __uint64_t owner)
+ uint64_t owner)
{
struct xfs_bmbt_irec bmap;
@@ -2291,3 +2305,31 @@ xfs_rmap_free_extent(
return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
XFS_DATA_FORK, &bmap);
}
+
+/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */
+int
+xfs_rmap_compare(
+ const struct xfs_rmap_irec *a,
+ const struct xfs_rmap_irec *b)
+{
+ __u64 oa;
+ __u64 ob;
+
+ oa = xfs_rmap_irec_offset_pack(a);
+ ob = xfs_rmap_irec_offset_pack(b);
+
+ if (a->rm_startblock < b->rm_startblock)
+ return -1;
+ else if (a->rm_startblock > b->rm_startblock)
+ return 1;
+ else if (a->rm_owner < b->rm_owner)
+ return -1;
+ else if (a->rm_owner > b->rm_owner)
+ return 1;
+ else if (oa < ob)
+ return -1;
+ else if (oa > ob)
+ return 1;
+ else
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 789930599339..466ede637080 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)(
int xfs_rmap_query_range(struct xfs_btree_cur *cur,
struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
xfs_rmap_query_range_fn fn, void *priv);
+int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn,
+ void *priv);
enum xfs_rmap_intent_type {
XFS_RMAP_MAP,
@@ -177,7 +179,7 @@ enum xfs_rmap_intent_type {
struct xfs_rmap_intent {
struct list_head ri_list;
enum xfs_rmap_intent_type ri_type;
- __uint64_t ri_owner;
+ uint64_t ri_owner;
int ri_whichfork;
struct xfs_bmbt_irec ri_bmap;
};
@@ -194,15 +196,15 @@ int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
struct xfs_bmbt_irec *imap);
int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- __uint64_t owner);
+ uint64_t owner);
int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- __uint64_t owner);
+ uint64_t owner);
void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
- __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+ uint64_t owner, int whichfork, xfs_fileoff_t startoff,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
@@ -212,5 +214,10 @@ int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
+int xfs_rmap_compare(const struct xfs_rmap_irec *a,
+ const struct xfs_rmap_irec *b);
+union xfs_btree_rec;
+int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+ struct xfs_rmap_irec *irec);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 74e5a54bc428..9d9c9192584c 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -199,7 +199,7 @@ xfs_rmapbt_init_high_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- __uint64_t off;
+ uint64_t off;
int adj;
adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
@@ -241,7 +241,7 @@ xfs_rmapbt_init_ptr_from_cur(
ptr->s = agf->agf_roots[cur->bc_btnum];
}
-STATIC __int64_t
+STATIC int64_t
xfs_rmapbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
@@ -249,9 +249,9 @@ xfs_rmapbt_key_diff(
struct xfs_rmap_irec *rec = &cur->bc_rec.r;
struct xfs_rmap_key *kp = &key->rmap;
__u64 x, y;
- __int64_t d;
+ int64_t d;
- d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+ d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
if (d)
return d;
@@ -271,7 +271,7 @@ xfs_rmapbt_key_diff(
return 0;
}
-STATIC __int64_t
+STATIC int64_t
xfs_rmapbt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
@@ -279,10 +279,10 @@ xfs_rmapbt_diff_two_keys(
{
struct xfs_rmap_key *kp1 = &k1->rmap;
struct xfs_rmap_key *kp2 = &k2->rmap;
- __int64_t d;
+ int64_t d;
__u64 x, y;
- d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
+ d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
be32_to_cpu(kp2->rm_startblock);
if (d)
return d;
@@ -377,17 +377,16 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
.verify_write = xfs_rmapbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_rmapbt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- __uint32_t x;
- __uint32_t y;
- __uint64_t a;
- __uint64_t b;
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
x = be32_to_cpu(k1->rmap.rm_startblock);
y = be32_to_cpu(k2->rmap.rm_startblock);
@@ -414,10 +413,10 @@ xfs_rmapbt_recs_inorder(
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- __uint32_t x;
- __uint32_t y;
- __uint64_t a;
- __uint64_t b;
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
x = be32_to_cpu(r1->rmap.rm_startblock);
y = be32_to_cpu(r2->rmap.rm_startblock);
@@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder(
return 1;
return 0;
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_rmapbt_ops = {
.rec_len = sizeof(struct xfs_rmap_rec),
@@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
.key_diff = xfs_rmapbt_key_diff,
.buf_ops = &xfs_rmapbt_buf_ops,
.diff_two_keys = xfs_rmapbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index ea45584a9913..5d4e43ef4eea 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
-static int
+int
xfs_rtbuf_get(
xfs_mount_t *mp, /* file system mount structure */
xfs_trans_t *tp, /* transaction pointer */
@@ -1011,8 +1011,78 @@ xfs_rtfree_extent(
mp->m_sb.sb_rextents) {
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
- *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
+ *(uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
}
return 0;
}
+
+/* Find all the free records within a given range. */
+int
+xfs_rtalloc_query_range(
+ struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *low_rec,
+ struct xfs_rtalloc_rec *high_rec,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_rtalloc_rec rec;
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_rtblock_t rtstart;
+ xfs_rtblock_t rtend;
+ xfs_rtblock_t rem;
+ int is_free;
+ int error = 0;
+
+ if (low_rec->ar_startblock > high_rec->ar_startblock)
+ return -EINVAL;
+ else if (low_rec->ar_startblock == high_rec->ar_startblock)
+ return 0;
+
+ /* Iterate the bitmap, looking for discrepancies. */
+ rtstart = low_rec->ar_startblock;
+ rem = high_rec->ar_startblock - rtstart;
+ while (rem) {
+ /* Is the first block free? */
+ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
+ &is_free);
+ if (error)
+ break;
+
+ /* How long does the extent go for? */
+ error = xfs_rtfind_forw(mp, tp, rtstart,
+ high_rec->ar_startblock - 1, &rtend);
+ if (error)
+ break;
+
+ if (is_free) {
+ rec.ar_startblock = rtstart;
+ rec.ar_blockcount = rtend - rtstart + 1;
+
+ error = fn(tp, &rec, priv);
+ if (error)
+ break;
+ }
+
+ rem -= rtend - rtstart + 1;
+ rtstart = rtend + 1;
+ }
+
+ return error;
+}
+
+/* Find all the free records. */
+int
+xfs_rtalloc_query_all(
+ struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_rtalloc_rec keys[2];
+
+ keys[0].ar_startblock = 0;
+ keys[1].ar_startblock = tp->t_mountp->m_sb.sb_rblocks;
+ keys[0].ar_blockcount = keys[1].ar_blockcount = 0;
+
+ return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv);
+}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 584ec896a533..9b5aae2bcc0b 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -448,7 +448,7 @@ xfs_sb_quota_to_disk(
struct xfs_dsb *to,
struct xfs_sb *from)
{
- __uint16_t qflags = from->sb_qflags;
+ uint16_t qflags = from->sb_qflags;
to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
if (xfs_sb_version_has_pquotino(from)) {
@@ -756,7 +756,7 @@ xfs_sb_mount_common(
mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
- mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
+ mp->m_ialloc_inos = (int)MAX((uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 2e2c6716b623..c484877129a0 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -114,7 +114,7 @@ xfs_symlink_verify(
if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
return false;
if (be32_to_cpu(dsl->sl_offset) +
- be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+ be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
return false;
if (dsl->sl_owner == 0)
return false;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index b456cca1bfb2..6bd916bd35e2 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -477,14 +477,14 @@ xfs_calc_mkdir_reservation(
/*
* Making a new symplink is the same as creating a new file, but
* with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
+ * length (XFS_SYMLINK_MAXLEN).
*/
STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
return xfs_calc_create_reservation(mp) +
- xfs_calc_buf_res(1, MAXPATHLEN);
+ xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 7917f6e44286..d787c677d2a3 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -21,8 +21,20 @@
/*
* Components of space reservations.
*/
+
+/* Worst case number of rmaps that can be held in a block. */
#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \
(((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
+
+/* Adding one rmap could split every level up to the top of the tree. */
+#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
+
+/* Blocks we might need to add "b" rmaps to a tree. */
+#define XFS_NRMAPADD_SPACE_RES(mp, b)\
+ (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
+ XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
+ XFS_RMAPADD_SPACE_RES(mp))
+
#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \
(((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
#define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1)
@@ -30,13 +42,12 @@
(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
XFS_EXTENTADD_SPACE_RES(mp,w))
+
+/* Blocks we might need to add "b" mappings & rmappings to a file. */
#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\
- (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
- XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
- XFS_EXTENTADD_SPACE_RES(mp,w) + \
- ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
- XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
- (mp)->m_rmap_maxlevels)
+ (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \
+ XFS_NRMAPADD_SPACE_RES((mp), (b)))
+
#define XFS_DAENTER_1B(mp,w) \
((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
#define XFS_DAENTER_DBS(mp,w) \
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 717909f2f7b7..0220159bd463 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -18,34 +18,34 @@
#ifndef __XFS_TYPES_H__
#define __XFS_TYPES_H__
-typedef __uint32_t prid_t; /* project ID */
+typedef uint32_t prid_t; /* project ID */
-typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */
-typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */
-typedef __uint32_t xfs_extlen_t; /* extent length in blocks */
-typedef __uint32_t xfs_agnumber_t; /* allocation group number */
-typedef __int32_t xfs_extnum_t; /* # of extents in a file */
-typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */
-typedef __int64_t xfs_fsize_t; /* bytes in a file */
-typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */
+typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
+typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
+typedef uint32_t xfs_extlen_t; /* extent length in blocks */
+typedef uint32_t xfs_agnumber_t; /* allocation group number */
+typedef int32_t xfs_extnum_t; /* # of extents in a file */
+typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */
+typedef int64_t xfs_fsize_t; /* bytes in a file */
+typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */
-typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */
-typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */
+typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
+typedef int32_t xfs_rtword_t; /* word type for bitmap manipulations */
-typedef __int64_t xfs_lsn_t; /* log sequence number */
-typedef __int32_t xfs_tid_t; /* transaction identifier */
+typedef int64_t xfs_lsn_t; /* log sequence number */
+typedef int32_t xfs_tid_t; /* transaction identifier */
-typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
-typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
+typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
+typedef uint32_t xfs_dahash_t; /* dir/attr hash value */
-typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
-typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __uint64_t xfs_fileoff_t; /* block number in a file */
-typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
+typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
+typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
+typedef uint64_t xfs_fileoff_t; /* block number in a file */
+typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
-typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
+typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+typedef int64_t xfs_sfiloff_t; /* signed block number in a file */
/*
* Null values for the types.
@@ -125,7 +125,7 @@ struct xfs_name {
* uid_t and gid_t are hard-coded to 32 bits in the inode.
* Hence, an 'id' in a dquot is 32 bits..
*/
-typedef __uint32_t xfs_dqid_t;
+typedef uint32_t xfs_dqid_t;
/*
* Constants for bit manipulations.
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
deleted file mode 100644
index b83f76b6d410..000000000000
--- a/fs/xfs/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
- __be32 uu_timelow;
- __be16 uu_timemid;
- __be16 uu_timehi;
- __be16 uu_clockseq;
- __be16 uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
- xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
- fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
- be16_to_cpu(uup->uu_timemid);
- fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
- int i;
- char *cp = (char *)uuid;
-
- if (uuid == NULL)
- return 0;
- /* implied check of version number here... */
- for (i = 0; i < sizeof *uuid; i++)
- if (*cp++) return 0; /* not nil */
- return 1; /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
- return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
deleted file mode 100644
index 104db0f3bed6..000000000000
--- a/fs/xfs/uuid.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
- unsigned char __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
- memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index a742c47f7d5a..80cd0fd86783 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,6 +24,10 @@
#define XFS_BUF_LOCK_TRACKING 1
#endif
+#ifdef CONFIG_XFS_ASSERT_FATAL
+#define XFS_ASSERT_FATAL 1
+#endif
+
#ifdef CONFIG_XFS_WARN
#define XFS_WARN 1
#endif
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b468e041f207..7034e17535de 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type)
return acl;
}
-STATIC int
-__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+int
+__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct xfs_inode *ip = XFS_I(inode);
unsigned char *ea_name;
@@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
set_acl:
- return __xfs_set_acl(inode, type, acl);
+ return __xfs_set_acl(inode, acl, type);
}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 286fa89217f5..04327318ef67 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,6 +24,7 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 631e7c0e0a29..6bf120bb1a17 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -103,19 +103,19 @@ xfs_finish_page_writeback(
unsigned int bsize;
ASSERT(bvec->bv_offset < PAGE_SIZE);
- ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
ASSERT(end < PAGE_SIZE);
- ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
bh = head = page_buffers(bvec->bv_page);
bsize = bh->b_size;
do {
+ if (off > end)
+ break;
next = bh->b_this_page;
if (off < bvec->bv_offset)
goto next_bh;
- if (off > end)
- break;
bh->b_end_io(bh, !error);
next_bh:
off += bsize;
@@ -189,7 +189,7 @@ xfs_setfilesize_trans_alloc(
* We hand off the transaction to the completion thread now, so
* clear the flag here.
*/
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return 0;
}
@@ -252,7 +252,7 @@ xfs_setfilesize_ioend(
* thus we need to mark ourselves as being in a transaction manually.
* Similarly for freeze protection.
*/
- current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */
@@ -274,54 +274,50 @@ xfs_end_io(
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
- int error = ioend->io_bio->bi_error;
+ xfs_off_t offset = ioend->io_offset;
+ size_t size = ioend->io_size;
+ int error;
/*
- * Set an error if the mount has shut down and proceed with end I/O
- * processing so it can perform whatever cleanups are necessary.
+ * Just clean up the in-memory strutures if the fs has been shut down.
*/
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
+ goto done;
+ }
/*
- * For a CoW extent, we need to move the mapping from the CoW fork
- * to the data fork. If instead an error happened, just dump the
- * new blocks.
+ * Clean up any COW blocks on an I/O error.
*/
- if (ioend->io_type == XFS_IO_COW) {
- if (error)
- goto done;
- if (ioend->io_bio->bi_error) {
- error = xfs_reflink_cancel_cow_range(ip,
- ioend->io_offset, ioend->io_size);
- goto done;
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
+ if (unlikely(error)) {
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
+ break;
}
- error = xfs_reflink_end_cow(ip, ioend->io_offset,
- ioend->io_size);
- if (error)
- goto done;
+
+ goto done;
}
/*
- * For unwritten extents we need to issue transactions to convert a
- * range to normal written extens after the data I/O has finished.
- * Detecting and handling completion IO errors is done individually
- * for each case as different cleanup operations need to be performed
- * on error.
+ * Success: commit the COW or unwritten blocks if needed.
*/
- if (ioend->io_type == XFS_IO_UNWRITTEN) {
- if (error)
- goto done;
- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
- ioend->io_size);
- } else if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(ioend, error);
- } else {
- ASSERT(!xfs_ioend_is_append(ioend) ||
- ioend->io_type == XFS_IO_COW);
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ error = xfs_reflink_end_cow(ip, offset, size);
+ break;
+ case XFS_IO_UNWRITTEN:
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ break;
+ default:
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+ break;
}
done:
+ if (ioend->io_append_trans)
+ error = xfs_setfilesize_ioend(ioend, error);
xfs_destroy_ioend(ioend, error);
}
@@ -337,7 +333,7 @@ xfs_end_bio(
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
- xfs_destroy_ioend(ioend, bio->bi_error);
+ xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
}
STATIC int
@@ -349,7 +345,7 @@ xfs_map_blocks(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- ssize_t count = 1 << inode->i_blkbits;
+ ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb, end_fsb;
int error = 0;
int bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -481,6 +477,12 @@ xfs_submit_ioend(
struct xfs_ioend *ioend,
int status)
{
+ /* Convert CoW extents to regular */
+ if (!status && ioend->io_type == XFS_IO_COW) {
+ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
+ ioend->io_offset, ioend->io_size);
+ }
+
/* Reserve log space if we might write beyond the on-disk inode size. */
if (!status &&
ioend->io_type != XFS_IO_UNWRITTEN &&
@@ -499,11 +501,12 @@ xfs_submit_ioend(
* time.
*/
if (status) {
- ioend->io_bio->bi_error = status;
+ ioend->io_bio->bi_status = errno_to_blk_status(status);
bio_endio(ioend->io_bio);
return status;
}
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -563,6 +566,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
@@ -752,7 +756,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while ((bh = bh->b_this_page) != head);
@@ -835,12 +839,12 @@ xfs_writepage_map(
struct inode *inode,
struct page *page,
loff_t offset,
- __uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
- ssize_t len = 1 << inode->i_blkbits;
+ ssize_t len = i_blocksize(inode);
int error = 0;
int count = 0;
int uptodate = 1;
@@ -990,7 +994,7 @@ xfs_do_writepage(
struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
loff_t offset;
- __uint64_t end_offset;
+ uint64_t end_offset;
pgoff_t end_index;
trace_xfs_writepage(inode, page, 0, 0);
@@ -1015,7 +1019,7 @@ xfs_do_writepage(
* Given that we do not allow direct reclaim to call us, we should
* never be called while in a filesystem transaction.
*/
- if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
goto redirty;
/*
@@ -1204,7 +1208,7 @@ xfs_map_trim_size(
offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
- 1 << inode->i_blkbits);
+ i_blocksize(inode));
}
if (mapping_size > LONG_MAX)
mapping_size = LONG_MAX;
@@ -1235,7 +1239,7 @@ xfs_get_blocks(
return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
- ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ ASSERT(bh_result->b_size >= i_blocksize(inode));
size = bh_result->b_size;
if (offset >= i_size_read(inode))
@@ -1260,8 +1264,8 @@ xfs_get_blocks(
if (nimaps) {
trace_xfs_get_blocks_found(ip, offset, size,
- ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
- : XFS_IO_OVERWRITE, &imap);
+ imap.br_state == XFS_EXT_UNWRITTEN ?
+ XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
xfs_iunlock(ip, lockmode);
} else {
trace_xfs_get_blocks_notfound(ip, offset, size);
@@ -1275,9 +1279,7 @@ xfs_get_blocks(
* For unwritten extents do not report a disk address in the buffered
* read case (treat as if we're reading into a hole).
*/
- if (imap.br_startblock != HOLESTARTBLOCK &&
- imap.br_startblock != DELAYSTARTBLOCK &&
- !ISUNWRITTEN(&imap))
+ if (xfs_bmap_is_real_extent(&imap))
xfs_map_buffer(inode, bh_result, &imap, offset);
/*
@@ -1317,9 +1319,12 @@ xfs_vm_bmap(
* The swap code (ab-)uses ->bmap to get a block mapping and then
* bypasseѕ the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
- * 0 is the magic code for a bmap error..
+ * 0 is the magic code for a bmap error.
+ *
+ * Since we don't pass back blockdev info, we can't return bmap
+ * information for rt files either.
*/
- if (xfs_is_reflink_inode(ip))
+ if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
filemap_write_and_wait(mapping);
@@ -1383,7 +1388,7 @@ xfs_vm_set_page_dirty(
if (offset < end_offset)
set_buffer_dirty(bh);
bh = bh->b_this_page;
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while (bh != head);
}
/*
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index d14691aa02b4..5d5a5e277f35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {
+ struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
@@ -140,8 +141,10 @@ typedef struct xfs_attr_list_context {
* Overall external interface routines.
*/
int xfs_attr_inactive(struct xfs_inode *dp);
+int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char *value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6eb91e..7740c8a5e736 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
bp = NULL;
if (cursor->blkno > 0) {
- error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+ error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if ((error != 0) && (error != -EFSCORRUPTED))
return error;
@@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
break;
case XFS_ATTR_LEAF_MAGIC:
@@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (cursor->hashval > be32_to_cpu(
entries[leafhdr.count - 1].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
} else if (cursor->hashval <= be32_to_cpu(
entries[0].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
break;
default:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
}
@@ -279,9 +279,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (bp == NULL) {
cursor->blkno = 0;
for (;;) {
- __uint16_t magic;
+ uint16_t magic;
- error = xfs_da3_node_read(NULL, dp,
+ error = xfs_da3_node_read(context->tp, dp,
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
@@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
node);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return -EFSCORRUPTED;
}
@@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
}
}
if (i == nodehdr.count) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
}
}
ASSERT(bp != NULL);
@@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (context->seen_enough || leafhdr.forw == 0)
break;
cursor->blkno = leafhdr.forw;
- xfs_trans_brelse(NULL, bp);
- error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+ xfs_trans_brelse(context->tp, bp);
+ error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
if (error)
return error;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
@@ -448,16 +448,36 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
trace_xfs_attr_leaf_list(context);
context->cursor->blkno = 0;
- error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+ error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
if (error)
return error;
xfs_attr3_leaf_list_int(bp, context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
int
+xfs_attr_list_int_ilocked(
+ struct xfs_attr_list_context *context)
+{
+ struct xfs_inode *dp = context->dp;
+
+ ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
+ /*
+ * Decide on what work routines to call based on the inode size.
+ */
+ if (!xfs_inode_hasattr(dp))
+ return 0;
+ else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xfs_attr_shortform_list(context);
+ else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+ return xfs_attr_leaf_list(context);
+ return xfs_attr_node_list(context);
+}
+
+int
xfs_attr_list_int(
xfs_attr_list_context_t *context)
{
@@ -470,19 +490,8 @@ xfs_attr_list_int(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- /*
- * Decide on what work routines to call based on the inode size.
- */
lock_mode = xfs_ilock_attr_map_shared(dp);
- if (!xfs_inode_hasattr(dp)) {
- error = 0;
- } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- error = xfs_attr_shortform_list(context);
- } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
- error = xfs_attr_leaf_list(context);
- } else {
- error = xfs_attr_node_list(context);
- }
+ error = xfs_attr_list_int_ilocked(context);
xfs_iunlock(dp, lock_mode);
return error;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 9bf57c76623b..88073910fa5d 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -34,6 +34,8 @@
#include "xfs_bmap.h"
#include "xfs_icache.h"
#include "xfs_trace.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
kmem_zone_t *xfs_bui_zone;
@@ -215,6 +217,7 @@ void
xfs_bui_release(
struct xfs_bui_log_item *buip)
{
+ ASSERT(atomic_read(&buip->bui_refcount) > 0);
if (atomic_dec_and_test(&buip->bui_refcount)) {
xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
xfs_bui_item_free(buip);
@@ -393,6 +396,7 @@ xfs_bui_recover(
struct xfs_map_extent *bmap;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t inode_fsb;
+ xfs_filblks_t count;
bool op_ok;
struct xfs_bud_log_item *budp;
enum xfs_bmap_intent_type type;
@@ -401,6 +405,7 @@ xfs_bui_recover(
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
struct xfs_defer_ops dfops;
+ struct xfs_bmbt_irec irec;
xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -446,7 +451,8 @@ xfs_bui_recover(
return -EIO;
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
if (error)
return error;
budp = xfs_trans_get_bud(tp, buip);
@@ -477,13 +483,24 @@ xfs_bui_recover(
}
xfs_trans_ijoin(tp, ip, 0);
+ count = bmap->me_len;
error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
ip, whichfork, bmap->me_startoff,
- bmap->me_startblock, bmap->me_len,
- state);
+ bmap->me_startblock, &count, state);
if (error)
goto err_dfops;
+ if (count > 0) {
+ ASSERT(type == XFS_BMAP_UNMAP);
+ irec.br_startblock = bmap->me_startblock;
+ irec.br_blockcount = count;
+ irec.br_startoff = bmap->me_startoff;
+ irec.br_state = state;
+ error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+ if (error)
+ goto err_dfops;
+ }
+
/* Finish transaction, free inodes. */
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c1417919ab0a..93e955262d07 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -81,14 +81,13 @@ xfs_zero_extent(
return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
block << (mp->m_super->s_blocksize_bits - 9),
count_fsb << (mp->m_super->s_blocksize_bits - 9),
- GFP_NOFS, true);
+ GFP_NOFS, 0);
}
int
xfs_bmap_rtalloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{
- xfs_alloctype_t atype = 0; /* type for allocation routines */
int error; /* error return value */
xfs_mount_t *mp; /* mount point structure */
xfs_extlen_t prod = 0; /* product factor for allocators */
@@ -155,18 +154,14 @@ xfs_bmap_rtalloc(
/*
* Realtime allocation, done through xfs_rtallocate_extent.
*/
- atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
do_div(ap->blkno, mp->m_sb.sb_rextsize);
rtb = ap->blkno;
ap->length = ralen;
- if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
- &ralen, atype, ap->wasdel, prod, &rtb)))
- return error;
- if (rtb == NULLFSBLOCK && prod > 1 &&
- (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
- ap->length, &ralen, atype,
- ap->wasdel, 1, &rtb)))
+ error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
+ &ralen, ap->wasdel, prod, &rtb);
+ if (error)
return error;
+
ap->blkno = rtb;
if (ap->blkno != NULLFSBLOCK) {
ap->blkno *= mp->m_sb.sb_rextsize;
@@ -224,20 +219,24 @@ xfs_bmap_eof(
*/
/*
- * Count leaf blocks given a range of extent records.
+ * Count leaf blocks given a range of extent records. Delayed allocation
+ * extents are not counted towards the totals.
*/
STATIC void
xfs_bmap_count_leaves(
- xfs_ifork_t *ifp,
- xfs_extnum_t idx,
- int numrecs,
- int *count)
+ struct xfs_ifork *ifp,
+ xfs_extnum_t *numrecs,
+ xfs_filblks_t *count)
{
- int b;
-
- for (b = 0; b < numrecs; b++) {
- xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
- *count += xfs_bmbt_get_blockcount(frp);
+ xfs_extnum_t i;
+ xfs_extnum_t nr_exts = xfs_iext_count(ifp);
+
+ for (i = 0; i < nr_exts; i++) {
+ xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
+ if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
+ (*numrecs)++;
+ *count += xfs_bmbt_get_blockcount(frp);
+ }
}
}
@@ -250,7 +249,7 @@ xfs_bmap_disk_count_leaves(
struct xfs_mount *mp,
struct xfs_btree_block *block,
int numrecs,
- int *count)
+ xfs_filblks_t *count)
{
int b;
xfs_bmbt_rec_t *frp;
@@ -265,17 +264,18 @@ xfs_bmap_disk_count_leaves(
* Recursively walks each level of a btree
* to count total fsblocks in use.
*/
-STATIC int /* error */
+STATIC int
xfs_bmap_count_tree(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ifork_t *ifp, /* inode fork pointer */
- xfs_fsblock_t blockno, /* file system block number */
- int levelin, /* level in btree */
- int *count) /* Count of blocks */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_ifork *ifp,
+ xfs_fsblock_t blockno,
+ int levelin,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
{
int error;
- xfs_buf_t *bp, *nbp;
+ struct xfs_buf *bp, *nbp;
int level = levelin;
__be64 *pp;
xfs_fsblock_t bno = blockno;
@@ -308,8 +308,9 @@ xfs_bmap_count_tree(
/* Dive to the next level */
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
- if (unlikely((error =
- xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
+ count);
+ if (error) {
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
@@ -321,6 +322,7 @@ xfs_bmap_count_tree(
for (;;) {
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
+ (*nextents) += numrecs;
xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
@@ -339,46 +341,64 @@ xfs_bmap_count_tree(
}
/*
- * Count fsblocks of the given fork.
+ * Count fsblocks of the given fork. Delayed allocation extents are
+ * not counted towards the totals.
*/
-static int /* error */
+int
xfs_bmap_count_blocks(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- int whichfork, /* data or attr fork */
- int *count) /* out: count of blocks */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
{
+ struct xfs_mount *mp; /* file system mount structure */
+ __be64 *pp; /* pointer to block address */
struct xfs_btree_block *block; /* current btree block */
+ struct xfs_ifork *ifp; /* fork structure */
xfs_fsblock_t bno; /* block # of "block" */
- xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
- xfs_mount_t *mp; /* file system mount structure */
- __be64 *pp; /* pointer to block address */
+ int error;
bno = NULLFSBLOCK;
mp = ip->i_mount;
+ *nextents = 0;
+ *count = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
- if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
- xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+ if (!ifp)
return 0;
- }
- /*
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
- */
- block = ifp->if_broot;
- level = be16_to_cpu(block->bb_level);
- ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
- bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
- if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
- mp);
- return -EFSCORRUPTED;
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_EXTENTS:
+ xfs_bmap_count_leaves(ifp, nextents, count);
+ return 0;
+ case XFS_DINODE_FMT_BTREE:
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+ */
+ block = ifp->if_broot;
+ level = be16_to_cpu(block->bb_level);
+ ASSERT(level > 0);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+ bno = be64_to_cpu(*pp);
+ ASSERT(bno != NULLFSBLOCK);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
+ nextents, count);
+ if (error) {
+ XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
+ XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
return 0;
@@ -394,11 +414,11 @@ xfs_getbmapx_fix_eof_hole(
struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
* preallocated data space */
- __int64_t end, /* last block requested */
+ int64_t end, /* last block requested */
xfs_fsblock_t startblock,
bool moretocome)
{
- __int64_t fixlen;
+ int64_t fixlen;
xfs_mount_t *mp; /* file system mount point */
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_extnum_t lastx; /* last extent pointer */
@@ -453,16 +473,15 @@ xfs_getbmap_adjust_shared(
next_map->br_blockcount = 0;
/* Only written data blocks can be shared. */
- if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK ||
- map->br_startblock == DELAYSTARTBLOCK ||
- map->br_startblock == HOLESTARTBLOCK ||
- ISUNWRITTEN(map))
+ if (!xfs_is_reflink_inode(ip) ||
+ whichfork != XFS_DATA_FORK ||
+ !xfs_bmap_is_real_extent(map))
return 0;
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
- error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount,
- &ebno, &elen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ map->br_blockcount, &ebno, &elen, true);
if (error)
return error;
@@ -520,9 +539,9 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */
{
- __int64_t bmvend; /* last block requested */
+ int64_t bmvend; /* last block requested */
int error = 0; /* return value */
- __int64_t fixlen; /* length for -1 case */
+ int64_t fixlen; /* length for -1 case */
int i; /* extent number */
int lock; /* lock state */
xfs_bmbt_irec_t *map; /* buffer for user's data */
@@ -588,9 +607,13 @@ xfs_getbmap(
}
break;
default:
+ /* Local format data forks report no extents. */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ bmv->bmv_entries = 0;
+ return 0;
+ }
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
- ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+ ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
@@ -607,7 +630,7 @@ xfs_getbmap(
if (bmv->bmv_length == -1) {
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
bmv->bmv_length =
- max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+ max_t(int64_t, fixlen - bmv->bmv_offset, 0);
} else if (bmv->bmv_length == 0) {
bmv->bmv_entries = 0;
return 0;
@@ -718,7 +741,7 @@ xfs_getbmap(
* extents.
*/
if (map[i].br_startblock == DELAYSTARTBLOCK &&
- map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+ map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
if (map[i].br_startblock == HOLESTARTBLOCK &&
@@ -744,7 +767,7 @@ xfs_getbmap(
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
- max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+ max_t(int64_t, 0, bmvend - bmv->bmv_offset);
/*
* In case we don't want to return the hole,
@@ -787,11 +810,9 @@ xfs_getbmap(
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
for (i = 0; i < cur_ext; i++) {
- int full = 0; /* user array is full */
-
/* format results & advance arg */
- error = formatter(&arg, &out[i], &full);
- if (error || full)
+ error = formatter(&arg, &out[i]);
+ if (error)
break;
}
@@ -911,23 +932,22 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
}
/*
- * This is called by xfs_inactive to free any blocks beyond eof
- * when the link count isn't zero and by xfs_dm_punch_hole() when
- * punching a hole to EOF.
+ * This is called to free any blocks beyond eof. The caller must hold
+ * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
+ * reference to the inode.
*/
int
xfs_free_eofblocks(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- bool need_iolock)
+ struct xfs_inode *ip)
{
- xfs_trans_t *tp;
- int error;
- xfs_fileoff_t end_fsb;
- xfs_fileoff_t last_fsb;
- xfs_filblks_t map_len;
- int nimaps;
- xfs_bmbt_irec_t imap;
+ struct xfs_trans *tp;
+ int error;
+ xfs_fileoff_t end_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_filblks_t map_len;
+ int nimaps;
+ struct xfs_bmbt_irec imap;
+ struct xfs_mount *mp = ip->i_mount;
/*
* Figure out if there are any blocks beyond the end
@@ -944,6 +964,10 @@ xfs_free_eofblocks(
error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ /*
+ * If there are blocks after the end of file, truncate the file to its
+ * current size to free them up.
+ */
if (!error && (nimaps != 0) &&
(imap.br_startblock != HOLESTARTBLOCK ||
ip->i_delayed_blks)) {
@@ -954,22 +978,13 @@ xfs_free_eofblocks(
if (error)
return error;
- /*
- * There are blocks after the end of file.
- * Free them up now by truncating the file to
- * its current size.
- */
- if (need_iolock) {
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
- return -EAGAIN;
- }
+ /* wait on dio to ensure i_size has settled */
+ inode_dio_wait(VFS_I(ip));
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
&tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -997,8 +1012,6 @@ xfs_free_eofblocks(
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}
return error;
}
@@ -1222,11 +1235,8 @@ xfs_adjust_extent_unmap_boundaries(
return error;
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
- xfs_daddr_t block;
-
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- block = imap.br_startblock;
- mod = do_div(block, mp->m_sb.sb_rextsize);
+ mod = do_mod(imap.br_startblock, mp->m_sb.sb_rextsize);
if (mod)
*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
}
@@ -1324,8 +1334,16 @@ xfs_free_file_space(
/*
* Now that we've unmap all full blocks we'll have to zero out any
* partial block at the beginning and/or end. xfs_zero_range is
- * smart enough to skip any holes, including those we just created.
+ * smart enough to skip any holes, including those we just created,
+ * but we must take care not to zero beyond EOF and enlarge i_size.
*/
+
+ if (offset >= XFS_ISIZE(ip))
+ return 0;
+
+ if (offset + len > XFS_ISIZE(ip))
+ len = XFS_ISIZE(ip) - offset;
+
return xfs_zero_range(ip, offset, len, NULL);
}
@@ -1393,10 +1411,16 @@ xfs_shift_file_space(
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
+ uint resblks;
ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
if (direction == SHIFT_LEFT) {
+ /*
+ * Reserve blocks to cover potential extent merges after left
+ * shift operations.
+ */
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
next_fsb = XFS_B_TO_FSB(mp, offset + len);
stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
} else {
@@ -1404,6 +1428,7 @@ xfs_shift_file_space(
* If right shift, delegate the work of initialization of
* next_fsb to xfs_bmap_shift_extent as it has ilock held.
*/
+ resblks = 0;
next_fsb = NULLFSBLOCK;
stop_fsb = XFS_B_TO_FSB(mp, offset);
}
@@ -1415,7 +1440,7 @@ xfs_shift_file_space(
* into the accessible region of the file.
*/
if (xfs_can_free_eofblocks(ip, true)) {
- error = xfs_free_eofblocks(mp, ip, false);
+ error = xfs_free_eofblocks(ip);
if (error)
return error;
}
@@ -1445,21 +1470,14 @@ xfs_shift_file_space(
}
while (!error && !done) {
- /*
- * We would need to reserve permanent block for transaction.
- * This will come into picture when after shifting extent into
- * hole we found that adjacent extents can be merged which
- * may lead to freeing of a block during record update.
- */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+ &tp);
if (error)
break;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
- ip->i_gdquot, ip->i_pdquot,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ ip->i_gdquot, ip->i_pdquot, resblks, 0,
XFS_QMOPT_RES_REGBLKS);
if (error)
goto out_trans_cancel;
@@ -1624,7 +1642,7 @@ xfs_swap_extents_check_format(
* extent format...
*/
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
- if (XFS_IFORK_BOFF(ip) &&
+ if (XFS_IFORK_Q(ip) &&
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
return -EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
@@ -1634,7 +1652,7 @@ xfs_swap_extents_check_format(
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
- if (XFS_IFORK_BOFF(tip) &&
+ if (XFS_IFORK_Q(tip) &&
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
return -EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
@@ -1683,7 +1701,7 @@ xfs_swap_extent_rmap(
xfs_filblks_t ilen;
xfs_filblks_t rlen;
int nimaps;
- __uint64_t tip_flags2;
+ uint64_t tip_flags2;
/*
* If the source file has shared blocks, we must flag the donor
@@ -1796,10 +1814,11 @@ xfs_swap_extent_forks(
int *target_log_flags)
{
struct xfs_ifork tempifp, *ifp, *tifp;
- int aforkblks = 0;
- int taforkblks = 0;
+ xfs_filblks_t aforkblks = 0;
+ xfs_filblks_t taforkblks = 0;
+ xfs_extnum_t junk;
xfs_extnum_t nextents;
- __uint64_t tmp;
+ uint64_t tmp;
int error;
/*
@@ -1807,14 +1826,14 @@ xfs_swap_extent_forks(
*/
if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
&aforkblks);
if (error)
return error;
}
if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
(tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
&taforkblks);
if (error)
return error;
@@ -1857,15 +1876,15 @@ xfs_swap_extent_forks(
/*
* Fix the on-disk inode values
*/
- tmp = (__uint64_t)ip->i_d.di_nblocks;
+ tmp = (uint64_t)ip->i_d.di_nblocks;
ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
- tmp = (__uint64_t) ip->i_d.di_nextents;
+ tmp = (uint64_t) ip->i_d.di_nextents;
ip->i_d.di_nextents = tip->i_d.di_nextents;
tip->i_d.di_nextents = tmp;
- tmp = (__uint64_t) ip->i_d.di_format;
+ tmp = (uint64_t) ip->i_d.di_format;
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
@@ -1934,7 +1953,7 @@ xfs_swap_extents(
int error = 0;
int lock_flags;
struct xfs_ifork *cowfp;
- __uint64_t f;
+ uint64_t f;
int resblks;
/*
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 68a621a8e0c0..0cede1043571 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
/* bmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
+typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *);
int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
xfs_bmap_format_t formatter, void *arg);
@@ -63,12 +63,15 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
- bool need_iolock);
+int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
struct xfs_swapext *sx);
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, xfs_extnum_t *nextents,
+ xfs_filblks_t *count);
+
#endif /* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ac3b4db519df..72f038492ba8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -33,6 +33,7 @@
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include <linux/sched/mm.h>
#include "xfs_format.h"
#include "xfs_log_format.h"
@@ -96,12 +97,16 @@ static inline void
xfs_buf_ioacct_inc(
struct xfs_buf *bp)
{
- if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+ if (bp->b_flags & XBF_NO_IOACCT)
return;
ASSERT(bp->b_flags & XBF_ASYNC);
- bp->b_flags |= _XBF_IN_FLIGHT;
- percpu_counter_inc(&bp->b_target->bt_io_count);
+ spin_lock(&bp->b_lock);
+ if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+ bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_inc(&bp->b_target->bt_io_count);
+ }
+ spin_unlock(&bp->b_lock);
}
/*
@@ -109,14 +114,24 @@ xfs_buf_ioacct_inc(
* freed and unaccount from the buftarg.
*/
static inline void
-xfs_buf_ioacct_dec(
+__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- if (!(bp->b_flags & _XBF_IN_FLIGHT))
- return;
+ lockdep_assert_held(&bp->b_lock);
+
+ if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+ bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_dec(&bp->b_target->bt_io_count);
+ }
+}
- bp->b_flags &= ~_XBF_IN_FLIGHT;
- percpu_counter_dec(&bp->b_target->bt_io_count);
+static inline void
+xfs_buf_ioacct_dec(
+ struct xfs_buf *bp)
+{
+ spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+ spin_unlock(&bp->b_lock);
}
/*
@@ -148,9 +163,9 @@ xfs_buf_stale(
* unaccounted (released to LRU) before that occurs. Drop in-flight
* status now to preserve accounting consistency.
*/
- xfs_buf_ioacct_dec(bp);
-
spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+
atomic_set(&bp->b_lru_ref, 0);
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
(list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -442,17 +457,17 @@ _xfs_buf_map_pages(
bp->b_addr = NULL;
} else {
int retried = 0;
- unsigned noio_flag;
+ unsigned nofs_flag;
/*
* vm_map_ram() will allocate auxillary structures (e.g.
* pagetables) with GFP_KERNEL, yet we are likely to be under
* GFP_NOFS context here. Hence we need to tell memory reclaim
- * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+ * that we are in such a context via PF_MEMALLOC_NOFS to prevent
* memory reclaim re-entering the filesystem here and
* potentially deadlocking.
*/
- noio_flag = memalloc_noio_save();
+ nofs_flag = memalloc_nofs_save();
do {
bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-1, PAGE_KERNEL);
@@ -460,7 +475,7 @@ _xfs_buf_map_pages(
break;
vm_unmap_aliases();
} while (retried++ <= 1);
- memalloc_noio_restore(noio_flag);
+ memalloc_nofs_restore(nofs_flag);
if (!bp->b_addr)
return -ENOMEM;
@@ -758,7 +773,7 @@ xfs_buf_readahead_map(
int nmaps,
const struct xfs_buf_ops *ops)
{
- if (bdi_read_congested(target->bt_bdi))
+ if (bdi_read_congested(target->bt_bdev->bd_bdi))
return;
xfs_buf_read_map(target, map, nmaps,
@@ -978,12 +993,12 @@ xfs_buf_rele(
* ensures the decrement occurs only once per-buf.
*/
if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
goto out_unlock;
}
/* the last reference has been dropped ... */
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU take a new reference to the
@@ -1078,6 +1093,8 @@ void
xfs_buf_unlock(
struct xfs_buf *bp)
{
+ ASSERT(xfs_buf_islocked(bp));
+
XB_CLEAR_OWNER(bp);
up(&bp->b_sema);
@@ -1177,7 +1194,7 @@ xfs_buf_ioerror_alert(
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+ (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
}
int
@@ -1210,8 +1227,11 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -1791,7 +1811,6 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
- btp->bt_bdi = blk_get_backing_dev_info(bdev);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
@@ -1815,6 +1834,28 @@ error:
}
/*
+ * Cancel a delayed write list.
+ *
+ * Remove each buffer from the list, clear the delwri queue flag and drop the
+ * associated buffer reference.
+ */
+void
+xfs_buf_delwri_cancel(
+ struct list_head *list)
+{
+ struct xfs_buf *bp;
+
+ while (!list_empty(list)) {
+ bp = list_first_entry(list, struct xfs_buf, b_list);
+
+ xfs_buf_lock(bp);
+ bp->b_flags &= ~_XBF_DELWRI_Q;
+ list_del_init(&bp->b_list);
+ xfs_buf_relse(bp);
+ }
+}
+
+/*
* Add a buffer to the delayed write list.
*
* This queues a buffer for writeout if it hasn't already been. Note that
@@ -2009,6 +2050,66 @@ xfs_buf_delwri_submit(
return error;
}
+/*
+ * Push a single buffer on a delwri queue.
+ *
+ * The purpose of this function is to submit a single buffer of a delwri queue
+ * and return with the buffer still on the original queue. The waiting delwri
+ * buffer submission infrastructure guarantees transfer of the delwri queue
+ * buffer reference to a temporary wait list. We reuse this infrastructure to
+ * transfer the buffer back to the original queue.
+ *
+ * Note the buffer transitions from the queued state, to the submitted and wait
+ * listed state and back to the queued state during this call. The buffer
+ * locking and queue management logic between _delwri_pushbuf() and
+ * _delwri_queue() guarantee that the buffer cannot be queued to another list
+ * before returning.
+ */
+int
+xfs_buf_delwri_pushbuf(
+ struct xfs_buf *bp,
+ struct list_head *buffer_list)
+{
+ LIST_HEAD (submit_list);
+ int error;
+
+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+ trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
+
+ /*
+ * Isolate the buffer to a new local list so we can submit it for I/O
+ * independently from the rest of the original list.
+ */
+ xfs_buf_lock(bp);
+ list_move(&bp->b_list, &submit_list);
+ xfs_buf_unlock(bp);
+
+ /*
+ * Delwri submission clears the DELWRI_Q buffer flag and returns with
+ * the buffer on the wait list with an associated reference. Rather than
+ * bounce the buffer from a local wait list back to the original list
+ * after I/O completion, reuse the original list as the wait list.
+ */
+ xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
+
+ /*
+ * The buffer is now under I/O and wait listed as during typical delwri
+ * submission. Lock the buffer to wait for I/O completion. Rather than
+ * remove the buffer from the wait list and release the reference, we
+ * want to return with the buffer queued to the original list. The
+ * buffer already sits on the original list with a wait list reference,
+ * however. If we let the queue inherit that wait list reference, all we
+ * need to do is reset the DELWRI_Q flag.
+ */
+ xfs_buf_lock(bp);
+ error = bp->b_error;
+ bp->b_flags |= _XBF_DELWRI_Q;
+ xfs_buf_unlock(bp);
+
+ return error;
+}
+
int __init
xfs_buf_init(void)
{
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8a9d3a9599f0..20721261dae5 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -63,7 +63,6 @@ typedef enum {
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
#define _XBF_COMPOUND (1 << 23)/* compound buffer */
-#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */
typedef unsigned int xfs_buf_flags_t;
@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }, \
- { _XBF_IN_FLIGHT, "IN_FLIGHT" }
+ { _XBF_COMPOUND, "COMPOUND" }
/*
* Internal state flags.
*/
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
+#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
/*
* The xfs_buftarg contains 2 notions of "sector size" -
@@ -109,7 +108,6 @@ typedef unsigned int xfs_buf_flags_t;
typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
- struct backing_dev_info *bt_bdi;
struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize;
size_t bt_meta_sectormask;
@@ -292,7 +290,6 @@ xfs_buf_readahead(
return xfs_buf_readahead_map(target, &map, 1, ops);
}
-struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
@@ -331,9 +328,11 @@ extern void *xfs_buf_offset(struct xfs_buf *, size_t);
extern void xfs_buf_stale(struct xfs_buf *bp);
/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
+extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2975cb2319f4..f6a8422e9562 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -636,20 +636,23 @@ xfs_buf_item_unlock(
/*
* Clean buffers, by definition, cannot be in the AIL. However, aborted
- * buffers may be dirty and hence in the AIL. Therefore if we are
- * aborting a buffer and we've just taken the last refernce away, we
- * have to check if it is in the AIL before freeing it. We need to free
- * it in this case, because an aborted transaction has already shut the
- * filesystem down and this is the last chance we will have to do so.
+ * buffers may be in the AIL regardless of dirty state. An aborted
+ * transaction that invalidates a buffer already in the AIL may have
+ * marked it stale and cleared the dirty state, for example.
+ *
+ * Therefore if we are aborting a buffer and we've just taken the last
+ * reference away, we have to check if it is in the AIL before freeing
+ * it. We need to free it in this case, because an aborted transaction
+ * has already shut the filesystem down and this is the last chance we
+ * will have to do so.
*/
if (atomic_dec_and_test(&bip->bli_refcount)) {
- if (clean)
- xfs_buf_item_relse(bp);
- else if (aborted) {
+ if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
- }
+ } else if (clean)
+ xfs_buf_item_relse(bp);
}
if (!(flags & XFS_BLI_HOLD))
@@ -1162,6 +1165,7 @@ xfs_buf_iodone_callbacks(
*/
bp->b_last_error = 0;
bp->b_retries = 0;
+ bp->b_first_retry_time = 0;
xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL;
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ba2638d37031 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
static unsigned char
xfs_dir3_get_dtype(
struct xfs_mount *mp,
- __uint8_t filetype)
+ uint8_t filetype)
{
if (!xfs_sb_version_hasftype(&mp->m_sb))
return DT_UNKNOWN;
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Give up if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
-
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
-
/*
* If the block number in the offset is out of range, we're done.
*/
@@ -128,7 +117,7 @@ xfs_dir2_sf_getdents(
*/
sfep = xfs_dir2_sf_firstentry(sfp);
for (i = 0; i < sfp->count; i++) {
- __uint8_t filetype;
+ uint8_t filetype;
off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
xfs_dir2_sf_get_offset(sfep));
@@ -181,7 +170,7 @@ xfs_dir2_block_getdents(
return 0;
lock_mode = xfs_ilock_data_map_shared(dp);
- error = xfs_dir3_block_read(NULL, dp, &bp);
+ error = xfs_dir3_block_read(args->trans, dp, &bp);
xfs_iunlock(dp, lock_mode);
if (error)
return error;
@@ -205,7 +194,7 @@ xfs_dir2_block_getdents(
* Each object is a real entry (dep) or an unused one (dup).
*/
while (ptr < endptr) {
- __uint8_t filetype;
+ uint8_t filetype;
dup = (xfs_dir2_data_unused_t *)ptr;
/*
@@ -239,7 +228,7 @@ xfs_dir2_block_getdents(
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
xfs_dir3_get_dtype(dp->i_mount, filetype))) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
}
@@ -250,211 +239,104 @@ xfs_dir2_block_getdents(
*/
ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
0x7fffffff;
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
-struct xfs_dir2_leaf_map_info {
- xfs_extlen_t map_blocks; /* number of fsbs in map */
- xfs_dablk_t map_off; /* last mapped file offset */
- int map_size; /* total entries in *map */
- int map_valid; /* valid entries in *map */
- int nmap; /* mappings to ask xfs_bmapi */
- xfs_dir2_db_t curdb; /* db for current block */
- int ra_current; /* number of read-ahead blks */
- int ra_index; /* *map index for read-ahead */
- int ra_offset; /* map entry offset for ra */
- int ra_want; /* readahead count wanted */
- struct xfs_bmbt_irec map[]; /* map vector for blocks */
-};
-
+/*
+ * Read a directory block and initiate readahead for blocks beyond that.
+ * We maintain a sliding readahead window of the remaining space in the
+ * buffer rounded up to the nearest block.
+ */
STATIC int
xfs_dir2_leaf_readbuf(
struct xfs_da_args *args,
size_t bufsize,
- struct xfs_dir2_leaf_map_info *mip,
- xfs_dir2_off_t *curoff,
- struct xfs_buf **bpp,
- bool trim_map)
+ xfs_dir2_off_t *cur_off,
+ xfs_dablk_t *ra_blk,
+ struct xfs_buf **bpp)
{
struct xfs_inode *dp = args->dp;
struct xfs_buf *bp = NULL;
- struct xfs_bmbt_irec *map = mip->map;
+ struct xfs_da_geometry *geo = args->geo;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+ struct xfs_bmbt_irec map;
struct blk_plug plug;
+ xfs_dir2_off_t new_off;
+ xfs_dablk_t next_ra;
+ xfs_dablk_t map_off;
+ xfs_dablk_t last_da;
+ xfs_extnum_t idx;
+ int ra_want;
int error = 0;
- int length;
- int i;
- int j;
- struct xfs_da_geometry *geo = args->geo;
-
- /*
- * If the caller just finished processing a buffer, it will tell us
- * we need to trim that block out of the mapping now it is done.
- */
- if (trim_map) {
- mip->map_blocks -= geo->fsbcount;
- /*
- * Loop to get rid of the extents for the
- * directory block.
- */
- for (i = geo->fsbcount; i > 0; ) {
- j = min_t(int, map->br_blockcount, i);
- map->br_blockcount -= j;
- map->br_startblock += j;
- map->br_startoff += j;
- /*
- * If mapping is done, pitch it from
- * the table.
- */
- if (!map->br_blockcount && --mip->map_valid)
- memmove(&map[0], &map[1],
- sizeof(map[0]) * mip->map_valid);
- i -= j;
- }
- }
-
- /*
- * Recalculate the readahead blocks wanted.
- */
- mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
- ASSERT(mip->ra_want >= 0);
-
- /*
- * If we don't have as many as we want, and we haven't
- * run out of data blocks, get some more mappings.
- */
- if (1 + mip->ra_want > mip->map_blocks &&
- mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
- /*
- * Get more bmaps, fill in after the ones
- * we already have in the table.
- */
- mip->nmap = mip->map_size - mip->map_valid;
- error = xfs_bmapi_read(dp, mip->map_off,
- xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
- mip->map_off,
- &map[mip->map_valid], &mip->nmap, 0);
- /*
- * Don't know if we should ignore this or try to return an
- * error. The trouble with returning errors is that readdir
- * will just stop without actually passing the error through.
- */
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK);
if (error)
- goto out; /* XXX */
-
- /*
- * If we got all the mappings we asked for, set the final map
- * offset based on the last bmap value received. Otherwise,
- * we've reached the end.
- */
- if (mip->nmap == mip->map_size - mip->map_valid) {
- i = mip->map_valid + mip->nmap - 1;
- mip->map_off = map[i].br_startoff + map[i].br_blockcount;
- } else
- mip->map_off = xfs_dir2_byte_to_da(geo,
- XFS_DIR2_LEAF_OFFSET);
-
- /*
- * Look for holes in the mapping, and eliminate them. Count up
- * the valid blocks.
- */
- for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
- if (map[i].br_startblock == HOLESTARTBLOCK) {
- mip->nmap--;
- length = mip->map_valid + mip->nmap - i;
- if (length)
- memmove(&map[i], &map[i + 1],
- sizeof(map[i]) * length);
- } else {
- mip->map_blocks += map[i].br_blockcount;
- i++;
- }
- }
- mip->map_valid += mip->nmap;
+ goto out;
}
/*
- * No valid mappings, so no more data blocks.
+ * Look for mapped directory blocks at or above the current offset.
+ * Truncate down to the nearest directory block to start the scanning
+ * operation.
*/
- if (!mip->map_valid) {
- *curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
+ last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
+ map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
+ if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map))
goto out;
- }
+ if (map.br_startoff >= last_da)
+ goto out;
+ xfs_trim_extent(&map, map_off, last_da - map_off);
- /*
- * Read the directory block starting at the first mapping.
- */
- mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
- error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
- map->br_blockcount >= geo->fsbcount ?
- XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
- -1, &bp);
- /*
- * Should just skip over the data block instead of giving up.
- */
+ /* Read the directory block of that first mapping. */
+ new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
+ if (new_off > *cur_off)
+ *cur_off = new_off;
+ error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp);
if (error)
- goto out; /* XXX */
-
- /*
- * Adjust the current amount of read-ahead: we just read a block that
- * was previously ra.
- */
- if (mip->ra_current)
- mip->ra_current -= geo->fsbcount;
+ goto out;
/*
- * Do we need more readahead?
+ * Start readahead for the next bufsize's worth of dir data blocks.
+ * We may have already issued readahead for some of that range;
+ * ra_blk tracks the last block we tried to read(ahead).
*/
+ ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
+ if (*ra_blk >= last_da)
+ goto out;
+ else if (*ra_blk == 0)
+ *ra_blk = map.br_startoff;
+ next_ra = map.br_startoff + geo->fsbcount;
+ if (next_ra >= last_da)
+ goto out_no_ra;
+ if (map.br_blockcount < geo->fsbcount &&
+ !xfs_iext_get_extent(ifp, ++idx, &map))
+ goto out_no_ra;
+ if (map.br_startoff >= last_da)
+ goto out_no_ra;
+ xfs_trim_extent(&map, next_ra, last_da - next_ra);
+
+ /* Start ra for each dir (not fs) block that has a mapping. */
blk_start_plug(&plug);
- for (mip->ra_index = mip->ra_offset = i = 0;
- mip->ra_want > mip->ra_current && i < mip->map_blocks;
- i += geo->fsbcount) {
- ASSERT(mip->ra_index < mip->map_valid);
- /*
- * Read-ahead a contiguous directory block.
- */
- if (i > mip->ra_current &&
- map[mip->ra_index].br_blockcount >= geo->fsbcount) {
- xfs_dir3_data_readahead(dp,
- map[mip->ra_index].br_startoff + mip->ra_offset,
- XFS_FSB_TO_DADDR(dp->i_mount,
- map[mip->ra_index].br_startblock +
- mip->ra_offset));
- mip->ra_current = i;
- }
-
- /*
- * Read-ahead a non-contiguous directory block. This doesn't
- * use our mapping, but this is a very rare case.
- */
- else if (i > mip->ra_current) {
- xfs_dir3_data_readahead(dp,
- map[mip->ra_index].br_startoff +
- mip->ra_offset, -1);
- mip->ra_current = i;
- }
-
- /*
- * Advance offset through the mapping table.
- */
- for (j = 0; j < geo->fsbcount; j += length ) {
- /*
- * The rest of this extent but not more than a dir
- * block.
- */
- length = min_t(int, geo->fsbcount,
- map[mip->ra_index].br_blockcount -
- mip->ra_offset);
- mip->ra_offset += length;
-
- /*
- * Advance to the next mapping if this one is used up.
- */
- if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
- mip->ra_offset = 0;
- mip->ra_index++;
+ while (ra_want > 0) {
+ next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount);
+ while (ra_want > 0 &&
+ next_ra < map.br_startoff + map.br_blockcount) {
+ if (next_ra >= last_da) {
+ *ra_blk = last_da;
+ break;
+ }
+ if (next_ra > *ra_blk) {
+ xfs_dir3_data_readahead(dp, next_ra, -2);
+ *ra_blk = next_ra;
}
+ ra_want -= geo->fsbcount;
+ next_ra += geo->fsbcount;
+ }
+ if (!xfs_iext_get_extent(ifp, ++idx, &map)) {
+ *ra_blk = last_da;
+ break;
}
}
blk_finish_plug(&plug);
@@ -462,6 +344,9 @@ xfs_dir2_leaf_readbuf(
out:
*bpp = bp;
return error;
+out_no_ra:
+ *ra_blk = last_da;
+ goto out;
}
/*
@@ -479,14 +364,14 @@ xfs_dir2_leaf_getdents(
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
- int error = 0; /* error return value */
- int length; /* temporary length value */
- int byteoff; /* offset in current block */
- xfs_dir2_off_t curoff; /* current overall offset */
- xfs_dir2_off_t newoff; /* new curoff after new blk */
char *ptr = NULL; /* pointer to current data */
- struct xfs_dir2_leaf_map_info *map_info;
struct xfs_da_geometry *geo = args->geo;
+ xfs_dablk_t rablk = 0; /* current readahead block */
+ xfs_dir2_off_t curoff; /* current overall offset */
+ int length; /* temporary length value */
+ int byteoff; /* offset in current block */
+ int lock_mode;
+ int error = 0; /* error return value */
/*
* If the offset is at or past the largest allowed value,
@@ -496,73 +381,35 @@ xfs_dir2_leaf_getdents(
return 0;
/*
- * Set up to bmap a number of blocks based on the caller's
- * buffer size, the directory block size, and the filesystem
- * block size.
- */
- length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
- map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
- (length * sizeof(struct xfs_bmbt_irec)),
- KM_SLEEP | KM_NOFS);
- map_info->map_size = length;
-
- /*
* Inside the loop we keep the main offset value as a byte offset
* in the directory file.
*/
curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
/*
- * Force this conversion through db so we truncate the offset
- * down to get the start of the data block.
- */
- map_info->map_off = xfs_dir2_db_to_da(geo,
- xfs_dir2_byte_to_db(geo, curoff));
-
- /*
* Loop over directory entries until we reach the end offset.
* Get more blocks and readahead as necessary.
*/
while (curoff < XFS_DIR2_LEAF_OFFSET) {
- __uint8_t filetype;
+ uint8_t filetype;
/*
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
- int lock_mode;
- bool trim_map = false;
-
if (bp) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
bp = NULL;
- trim_map = true;
}
lock_mode = xfs_ilock_data_map_shared(dp);
- error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
- &curoff, &bp, trim_map);
+ error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
+ &rablk, &bp);
xfs_iunlock(dp, lock_mode);
- if (error || !map_info->map_valid)
+ if (error || !bp)
break;
- /*
- * Having done a read, we need to set a new offset.
- */
- newoff = xfs_dir2_db_off_to_byte(geo,
- map_info->curdb, 0);
- /*
- * Start of the current block.
- */
- if (curoff < newoff)
- curoff = newoff;
- /*
- * Make sure we're in the right block.
- */
- else if (curoff > newoff)
- ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
- map_info->curdb);
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
/*
@@ -647,17 +494,22 @@ xfs_dir2_leaf_getdents(
ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
else
ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
- kmem_free(map_info);
if (bp)
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return error;
}
/*
* Read a directory.
+ *
+ * If supplied, the transaction collects locked dir buffers to avoid
+ * nested buffer deadlocks. This function does not dirty the
+ * transaction. The caller should ensure that the inode is locked
+ * before calling this function.
*/
int
xfs_readdir(
+ struct xfs_trans *tp,
struct xfs_inode *dp,
struct dir_context *ctx,
size_t bufsize)
@@ -676,6 +528,7 @@ xfs_readdir(
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
+ args.trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4ff499aa7338..b2cde5426182 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -39,7 +39,7 @@ xfs_trim_extents(
xfs_daddr_t start,
xfs_daddr_t end,
xfs_daddr_t minlen,
- __uint64_t *blocks_trimmed)
+ uint64_t *blocks_trimmed)
{
struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
struct xfs_btree_cur *cur;
@@ -132,6 +132,11 @@ next_extent:
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto out_del_cursor;
+
+ if (fatal_signal_pending(current)) {
+ error = -ERESTARTSYS;
+ goto out_del_cursor;
+ }
}
out_del_cursor:
@@ -161,7 +166,7 @@ xfs_ioc_trim(
struct fstrim_range range;
xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
- __uint64_t blocks_trimmed = 0;
+ uint64_t blocks_trimmed = 0;
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
@@ -196,8 +201,11 @@ xfs_ioc_trim(
for (agno = start_agno; agno <= end_agno; agno++) {
error = xfs_trim_extents(mp, agno, start, end, minlen,
&blocks_trimmed);
- if (error)
+ if (error) {
last_error = error;
+ if (error == -ERESTARTSYS)
+ break;
+ }
}
if (last_error)
@@ -208,32 +216,3 @@ xfs_ioc_trim(
return -EFAULT;
return 0;
}
-
-int
-xfs_discard_extents(
- struct xfs_mount *mp,
- struct list_head *list)
-{
- struct xfs_extent_busy *busyp;
- int error = 0;
-
- list_for_each_entry(busyp, list, list) {
- trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
- busyp->length);
-
- error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
- XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
- XFS_FSB_TO_BB(mp, busyp->length),
- GFP_NOFS, 0);
- if (error && error != -EOPNOTSUPP) {
- xfs_info(mp,
- "discard failed for extent [0x%llx,%u], error %d",
- (unsigned long long)busyp->bno,
- busyp->length,
- error);
- return error;
- }
- }
-
- return 0;
-}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
index 344879aea646..0f070f9e44e1 100644
--- a/fs/xfs/xfs_discard.h
+++ b/fs/xfs/xfs_discard.h
@@ -5,6 +5,5 @@ struct fstrim_range;
struct list_head;
extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int xfs_discard_extents(struct xfs_mount *, struct list_head *);
#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9d06cc30e875..fd2ef8c2c9a7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -276,7 +276,7 @@ xfs_qm_init_dquot_blk(
void
xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
{
- __uint64_t space;
+ uint64_t space;
dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
@@ -695,21 +695,18 @@ error0:
*/
static int
xfs_dq_get_next_id(
- xfs_mount_t *mp,
+ struct xfs_mount *mp,
uint type,
- xfs_dqid_t *id,
- loff_t eof)
+ xfs_dqid_t *id)
{
- struct xfs_inode *quotip;
+ struct xfs_inode *quotip = xfs_quota_inode(mp, type);
+ xfs_dqid_t next_id = *id + 1; /* simple advance */
+ uint lock_flags;
+ struct xfs_bmbt_irec got;
+ xfs_extnum_t idx;
xfs_fsblock_t start;
- loff_t offset;
- uint lock;
- xfs_dqid_t next_id;
int error = 0;
- /* Simple advance */
- next_id = *id + 1;
-
/* If we'd wrap past the max ID, stop */
if (next_id < *id)
return -ENOENT;
@@ -723,23 +720,25 @@ xfs_dq_get_next_id(
/* Nope, next_id is now past the current chunk, so find the next one */
start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
- quotip = xfs_quota_inode(mp, type);
- lock = xfs_ilock_data_map_shared(quotip);
-
- offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
- eof, SEEK_DATA);
- if (offset < 0)
- error = offset;
+ lock_flags = xfs_ilock_data_map_shared(quotip);
+ if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
- xfs_iunlock(quotip, lock);
+ if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) {
+ /* contiguous chunk, bump startoff for the id calculation */
+ if (got.br_startoff < start)
+ got.br_startoff = start;
+ *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
+ } else {
+ error = -ENOENT;
+ }
- /* -ENXIO is essentially "no more data" */
- if (error)
- return (error == -ENXIO ? -ENOENT: error);
+ xfs_iunlock(quotip, lock_flags);
- /* Convert next data offset back to a quota id */
- *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
- return 0;
+ return error;
}
/*
@@ -762,7 +761,6 @@ xfs_qm_dqget(
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
- loff_t eof = 0;
int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -790,21 +788,6 @@ xfs_qm_dqget(
}
#endif
- /* Get the end of the quota file if we need it */
- if (flags & XFS_QMOPT_DQNEXT) {
- struct xfs_inode *quotip;
- xfs_fileoff_t last;
- uint lock_mode;
-
- quotip = xfs_quota_inode(mp, type);
- lock_mode = xfs_ilock_data_map_shared(quotip);
- error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
- xfs_iunlock(quotip, lock_mode);
- if (error)
- return error;
- eof = XFS_FSB_TO_B(mp, last);
- }
-
restart:
mutex_lock(&qi->qi_tree_lock);
dqp = radix_tree_lookup(tree, id);
@@ -823,7 +806,7 @@ restart:
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_dqunlock(dqp);
mutex_unlock(&qi->qi_tree_lock);
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (error)
return error;
goto restart;
@@ -858,7 +841,7 @@ restart:
/* If we are asked to find next active id, keep looking */
if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (!error)
goto restart;
}
@@ -917,7 +900,7 @@ restart:
if (flags & XFS_QMOPT_DQNEXT) {
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_qm_dqput(dqp);
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (error)
return error;
goto restart;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed7ee4e8af73..2f4feb959bfb 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,103 +22,280 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_error.h"
+#include "xfs_sysfs.h"
#ifdef DEBUG
-int xfs_etest[XFS_NUM_INJECT_ERROR];
-int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
-char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-int xfs_error_test_active;
+static unsigned int xfs_errortag_random_default[] = {
+ XFS_RANDOM_DEFAULT,
+ XFS_RANDOM_IFLUSH_1,
+ XFS_RANDOM_IFLUSH_2,
+ XFS_RANDOM_IFLUSH_3,
+ XFS_RANDOM_IFLUSH_4,
+ XFS_RANDOM_IFLUSH_5,
+ XFS_RANDOM_IFLUSH_6,
+ XFS_RANDOM_DA_READ_BUF,
+ XFS_RANDOM_BTREE_CHECK_LBLOCK,
+ XFS_RANDOM_BTREE_CHECK_SBLOCK,
+ XFS_RANDOM_ALLOC_READ_AGF,
+ XFS_RANDOM_IALLOC_READ_AGI,
+ XFS_RANDOM_ITOBP_INOTOBP,
+ XFS_RANDOM_IUNLINK,
+ XFS_RANDOM_IUNLINK_REMOVE,
+ XFS_RANDOM_DIR_INO_VALIDATE,
+ XFS_RANDOM_BULKSTAT_READ_CHUNK,
+ XFS_RANDOM_IODONE_IOERR,
+ XFS_RANDOM_STRATREAD_IOERR,
+ XFS_RANDOM_STRATCMPL_IOERR,
+ XFS_RANDOM_DIOWRITE_IOERR,
+ XFS_RANDOM_BMAPIFORMAT,
+ XFS_RANDOM_FREE_EXTENT,
+ XFS_RANDOM_RMAP_FINISH_ONE,
+ XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
+ XFS_RANDOM_REFCOUNT_FINISH_ONE,
+ XFS_RANDOM_BMAP_FINISH_ONE,
+ XFS_RANDOM_AG_RESV_CRITICAL,
+ XFS_RANDOM_DROP_WRITES,
+ XFS_RANDOM_LOG_BAD_CRC,
+};
-int
-xfs_error_test(int error_tag, int *fsidp, char *expression,
- int line, char *file, unsigned long randfactor)
+struct xfs_errortag_attr {
+ struct attribute attr;
+ unsigned int tag;
+};
+
+static inline struct xfs_errortag_attr *
+to_attr(struct attribute *attr)
{
- int i;
- int64_t fsid;
+ return container_of(attr, struct xfs_errortag_attr, attr);
+}
- if (prandom_u32() % randfactor)
- return 0;
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
- memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
+ return container_of(kobj, struct xfs_mount, m_errortag_kobj);
+}
+
+STATIC ssize_t
+xfs_errortag_attr_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+ int ret;
+ unsigned int val;
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
- xfs_warn(NULL,
- "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
- expression, file, line, xfs_etest_fsname[i]);
- return 1;
- }
+ if (strcmp(buf, "default") == 0) {
+ val = xfs_errortag_random_default[xfs_attr->tag];
+ } else {
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
}
- return 0;
+ ret = xfs_errortag_set(mp, xfs_attr->tag, val);
+ if (ret)
+ return ret;
+ return count;
}
+STATIC ssize_t
+xfs_errortag_attr_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ xfs_errortag_get(mp, xfs_attr->tag));
+}
+
+static const struct sysfs_ops xfs_errortag_sysfs_ops = {
+ .show = xfs_errortag_attr_show,
+ .store = xfs_errortag_attr_store,
+};
+
+#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
+static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \
+ .attr = {.name = __stringify(_name), \
+ .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \
+ .tag = (_tag), \
+}
+
+#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
+
+XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR);
+XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1);
+XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2);
+XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3);
+XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4);
+XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5);
+XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6);
+XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF);
+XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK);
+XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK);
+XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF);
+XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI);
+XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP);
+XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK);
+XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE);
+XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE);
+XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK);
+XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR);
+XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR);
+XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT);
+XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT);
+XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
+XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
+XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
+XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
+
+static struct attribute *xfs_errortag_attrs[] = {
+ XFS_ERRORTAG_ATTR_LIST(noerror),
+ XFS_ERRORTAG_ATTR_LIST(iflush1),
+ XFS_ERRORTAG_ATTR_LIST(iflush2),
+ XFS_ERRORTAG_ATTR_LIST(iflush3),
+ XFS_ERRORTAG_ATTR_LIST(iflush4),
+ XFS_ERRORTAG_ATTR_LIST(iflush5),
+ XFS_ERRORTAG_ATTR_LIST(iflush6),
+ XFS_ERRORTAG_ATTR_LIST(dareadbuf),
+ XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
+ XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
+ XFS_ERRORTAG_ATTR_LIST(readagf),
+ XFS_ERRORTAG_ATTR_LIST(readagi),
+ XFS_ERRORTAG_ATTR_LIST(itobp),
+ XFS_ERRORTAG_ATTR_LIST(iunlink),
+ XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
+ XFS_ERRORTAG_ATTR_LIST(dirinovalid),
+ XFS_ERRORTAG_ATTR_LIST(bulkstat),
+ XFS_ERRORTAG_ATTR_LIST(logiodone),
+ XFS_ERRORTAG_ATTR_LIST(stratread),
+ XFS_ERRORTAG_ATTR_LIST(stratcmpl),
+ XFS_ERRORTAG_ATTR_LIST(diowrite),
+ XFS_ERRORTAG_ATTR_LIST(bmapifmt),
+ XFS_ERRORTAG_ATTR_LIST(free_extent),
+ XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
+ XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+ XFS_ERRORTAG_ATTR_LIST(drop_writes),
+ XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
+ NULL,
+};
+
+struct kobj_type xfs_errortag_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_errortag_sysfs_ops,
+ .default_attrs = xfs_errortag_attrs,
+};
+
int
-xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp)
+xfs_errortag_init(
+ struct xfs_mount *mp)
{
- int i;
- int len;
- int64_t fsid;
+ mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
+ KM_SLEEP | KM_MAYFAIL);
+ if (!mp->m_errortag)
+ return -ENOMEM;
- if (error_tag >= XFS_ERRTAG_MAX)
- return -EINVAL;
+ return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+}
- memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
+void
+xfs_errortag_del(
+ struct xfs_mount *mp)
+{
+ xfs_sysfs_del(&mp->m_errortag_kobj);
+ kmem_free(mp->m_errortag);
+}
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
- xfs_warn(mp, "error tag #%d on", error_tag);
- return 0;
- }
- }
+bool
+xfs_errortag_test(
+ struct xfs_mount *mp,
+ const char *expression,
+ const char *file,
+ int line,
+ unsigned int error_tag)
+{
+ unsigned int randfactor;
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest[i] == 0) {
- xfs_warn(mp, "Turned on XFS error tag #%d",
- error_tag);
- xfs_etest[i] = error_tag;
- xfs_etest_fsid[i] = fsid;
- len = strlen(mp->m_fsname);
- xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
- strcpy(xfs_etest_fsname[i], mp->m_fsname);
- xfs_error_test_active++;
- return 0;
- }
- }
+ /*
+ * To be able to use error injection anywhere, we need to ensure error
+ * injection mechanism is already initialized.
+ *
+ * Code paths like I/O completion can be called before the
+ * initialization is complete, but be able to inject errors in such
+ * places is still useful.
+ */
+ if (!mp->m_errortag)
+ return false;
- xfs_warn(mp, "error tag overflow, too many turned on");
+ ASSERT(error_tag < XFS_ERRTAG_MAX);
+ randfactor = mp->m_errortag[error_tag];
+ if (!randfactor || prandom_u32() % randfactor)
+ return false;
- return 1;
+ xfs_warn_ratelimited(mp,
+"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
+ expression, file, line, mp->m_fsname);
+ return true;
}
int
-xfs_errortag_clearall(xfs_mount_t *mp, int loud)
+xfs_errortag_get(
+ struct xfs_mount *mp,
+ unsigned int error_tag)
{
- int64_t fsid;
- int cleared = 0;
- int i;
-
- memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
- xfs_etest[i] != 0) {
- cleared = 1;
- xfs_warn(mp, "Clearing XFS error tag #%d",
- xfs_etest[i]);
- xfs_etest[i] = 0;
- xfs_etest_fsid[i] = 0LL;
- kmem_free(xfs_etest_fsname[i]);
- xfs_etest_fsname[i] = NULL;
- xfs_error_test_active--;
- }
- }
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
+
+ return mp->m_errortag[error_tag];
+}
+
+int
+xfs_errortag_set(
+ struct xfs_mount *mp,
+ unsigned int error_tag,
+ unsigned int tag_value)
+{
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
- if (loud || cleared)
- xfs_warn(mp, "Cleared all XFS error tags for filesystem");
+ mp->m_errortag[error_tag] = tag_value;
+ return 0;
+}
+int
+xfs_errortag_add(
+ struct xfs_mount *mp,
+ unsigned int error_tag)
+{
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
+
+ return xfs_errortag_set(mp, error_tag,
+ xfs_errortag_random_default[error_tag]);
+}
+
+int
+xfs_errortag_clearall(
+ struct xfs_mount *mp)
+{
+ memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX);
return 0;
}
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 05f8666733a0..7577be5f09bc 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -96,7 +96,17 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
#define XFS_ERRTAG_BMAP_FINISH_ONE 26
#define XFS_ERRTAG_AG_RESV_CRITICAL 27
-#define XFS_ERRTAG_MAX 28
+/*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are silenty dropped and handled as if they failed.
+ * All delalloc blocks in the range of the write (including pre-existing
+ * delalloc blocks!) are tossed as part of the write failure error
+ * handling sequence.
+ */
+#define XFS_ERRTAG_DROP_WRITES 28
+#define XFS_ERRTAG_LOG_BAD_CRC 29
+#define XFS_ERRTAG_MAX 30
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -129,23 +139,29 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
#define XFS_RANDOM_BMAP_FINISH_ONE 1
#define XFS_RANDOM_AG_RESV_CRITICAL 4
+#define XFS_RANDOM_DROP_WRITES 1
+#define XFS_RANDOM_LOG_BAD_CRC 1
#ifdef DEBUG
-extern int xfs_error_test_active;
-extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-
-#define XFS_NUM_INJECT_ERROR 10
-#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || (xfs_error_test_active && \
- xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
- (rf))))
+extern int xfs_errortag_init(struct xfs_mount *mp);
+extern void xfs_errortag_del(struct xfs_mount *mp);
+extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
+ const char *file, int line, unsigned int error_tag);
+#define XFS_TEST_ERROR(expr, mp, tag) \
+ ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
-extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp);
-extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
+extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
+ unsigned int tag_value);
+extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_clearall(struct xfs_mount *mp);
#else
-#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
-#define xfs_errortag_add(tag, mp) (ENOSYS)
-#define xfs_errortag_clearall(mp, loud) (ENOSYS)
+#define xfs_errortag_init(mp) (0)
+#define xfs_errortag_del(mp)
+#define XFS_TEST_ERROR(expr, mp, tag) (expr)
+#define xfs_errortag_set(mp, tag, val) (ENOSYS)
+#define xfs_errortag_add(mp, tag) (ENOSYS)
+#define xfs_errortag_clearall(mp) (ENOSYS)
#endif /* DEBUG */
/*
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 162dc186cf04..77760dbf0242 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
struct rb_node **rbp;
struct rb_node *parent = NULL;
- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
- if (!new) {
- /*
- * No Memory! Since it is now not possible to track the free
- * block, make this a synchronous transaction to insure that
- * the block is not reused before this transaction commits.
- */
- trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
- xfs_trans_set_sync(tp);
- return;
- }
-
+ new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
new->agno = agno;
new->bno = bno;
new->length = len;
@@ -345,25 +334,31 @@ restart:
* subset of the extent that is not busy. If *rlen is smaller than
* args->minlen no suitable extent could be found, and the higher level
* code needs to force out the log and retry the allocation.
+ *
+ * Return the current busy generation for the AG if the extent is busy. This
+ * value can be used to wait for at least one of the currently busy extents
+ * to be cleared. Note that the busy list is not guaranteed to be empty after
+ * the gen is woken. The state of a specific extent must always be confirmed
+ * with another call to xfs_extent_busy_trim() before it can be used.
*/
-void
+bool
xfs_extent_busy_trim(
struct xfs_alloc_arg *args,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_agblock_t *rbno,
- xfs_extlen_t *rlen)
+ xfs_agblock_t *bno,
+ xfs_extlen_t *len,
+ unsigned *busy_gen)
{
xfs_agblock_t fbno;
xfs_extlen_t flen;
struct rb_node *rbp;
+ bool ret = false;
- ASSERT(len > 0);
+ ASSERT(*len > 0);
spin_lock(&args->pag->pagb_lock);
restart:
- fbno = bno;
- flen = len;
+ fbno = *bno;
+ flen = *len;
rbp = args->pag->pagb_tree.rb_node;
while (rbp && flen >= args->minlen) {
struct xfs_extent_busy *busyp =
@@ -515,24 +510,25 @@ restart:
flen = fend - fbno;
}
- spin_unlock(&args->pag->pagb_lock);
+out:
- if (fbno != bno || flen != len) {
- trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
+ if (fbno != *bno || flen != *len) {
+ trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len,
fbno, flen);
+ *bno = fbno;
+ *len = flen;
+ *busy_gen = args->pag->pagb_gen;
+ ret = true;
}
- *rbno = fbno;
- *rlen = flen;
- return;
+ spin_unlock(&args->pag->pagb_lock);
+ return ret;
fail:
/*
* Return a zero extent length as failure indications. All callers
* re-check if the trimmed extent satisfies the minlen requirement.
*/
- spin_unlock(&args->pag->pagb_lock);
- trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
- *rbno = fbno;
- *rlen = 0;
+ flen = 0;
+ goto out;
}
STATIC void
@@ -551,6 +547,21 @@ xfs_extent_busy_clear_one(
kmem_free(busyp);
}
+static void
+xfs_extent_busy_put_pag(
+ struct xfs_perag *pag,
+ bool wakeup)
+ __releases(pag->pagb_lock)
+{
+ if (wakeup) {
+ pag->pagb_gen++;
+ wake_up_all(&pag->pagb_wait);
+ }
+
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
+}
+
/*
* Remove all extents on the passed in list from the busy extents tree.
* If do_discard is set skip extents that need to be discarded, and mark
@@ -565,27 +576,76 @@ xfs_extent_busy_clear(
struct xfs_extent_busy *busyp, *n;
struct xfs_perag *pag = NULL;
xfs_agnumber_t agno = NULLAGNUMBER;
+ bool wakeup = false;
list_for_each_entry_safe(busyp, n, list, list) {
if (busyp->agno != agno) {
- if (pag) {
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
- }
- pag = xfs_perag_get(mp, busyp->agno);
- spin_lock(&pag->pagb_lock);
+ if (pag)
+ xfs_extent_busy_put_pag(pag, wakeup);
agno = busyp->agno;
+ pag = xfs_perag_get(mp, agno);
+ spin_lock(&pag->pagb_lock);
+ wakeup = false;
}
if (do_discard && busyp->length &&
- !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
+ !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) {
busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
- else
+ } else {
xfs_extent_busy_clear_one(mp, pag, busyp);
+ wakeup = true;
+ }
}
- if (pag) {
- spin_unlock(&pag->pagb_lock);
+ if (pag)
+ xfs_extent_busy_put_pag(pag, wakeup);
+}
+
+/*
+ * Flush out all busy extents for this AG.
+ */
+void
+xfs_extent_busy_flush(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ unsigned busy_gen)
+{
+ DEFINE_WAIT (wait);
+ int log_flushed = 0, error;
+
+ trace_xfs_log_force(mp, 0, _THIS_IP_);
+ error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
+ if (error)
+ return;
+
+ do {
+ prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
+ if (busy_gen != READ_ONCE(pag->pagb_gen))
+ break;
+ schedule();
+ } while (1);
+
+ finish_wait(&pag->pagb_wait, &wait);
+}
+
+void
+xfs_extent_busy_wait_all(
+ struct xfs_mount *mp)
+{
+ DEFINE_WAIT (wait);
+ xfs_agnumber_t agno;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ struct xfs_perag *pag = xfs_perag_get(mp, agno);
+
+ do {
+ prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
+ if (RB_EMPTY_ROOT(&pag->pagb_tree))
+ break;
+ schedule();
+ } while (1);
+ finish_wait(&pag->pagb_wait, &wait);
+
xfs_perag_put(pag);
}
}
@@ -596,9 +656,17 @@ xfs_extent_busy_clear(
int
xfs_extent_busy_ag_cmp(
void *priv,
- struct list_head *a,
- struct list_head *b)
+ struct list_head *l1,
+ struct list_head *l2)
{
- return container_of(a, struct xfs_extent_busy, list)->agno -
- container_of(b, struct xfs_extent_busy, list)->agno;
+ struct xfs_extent_busy *b1 =
+ container_of(l1, struct xfs_extent_busy, list);
+ struct xfs_extent_busy *b2 =
+ container_of(l2, struct xfs_extent_busy, list);
+ s32 diff;
+
+ diff = b1->agno - b2->agno;
+ if (!diff)
+ diff = b1->bno - b2->bno;
+ return diff;
}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index bfff284d2dcc..60195ea1b84a 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -58,9 +58,16 @@ void
xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
+bool
+xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
+ xfs_extlen_t *len, unsigned *busy_gen);
+
+void
+xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
+ unsigned busy_gen);
+
void
-xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno,
- xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen);
+xfs_extent_busy_wait_all(struct xfs_mount *mp);
int
xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d7bc14906af8..44f8c5451210 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -290,6 +290,7 @@ void
xfs_efi_release(
struct xfs_efi_log_item *efip)
{
+ ASSERT(atomic_read(&efip->efi_refcount) > 0);
if (atomic_dec_and_test(&efip->efi_refcount)) {
xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
xfs_efi_item_free(efip);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bbb9eb6811b2..c4893e226fd8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -140,7 +140,7 @@ xfs_file_fsync(
trace_xfs_file_fsync(ip);
- error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ error = file_write_and_wait_range(file, start, end);
if (error)
return error;
@@ -237,7 +237,11 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -527,12 +531,25 @@ xfs_file_dio_aio_write(
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + count) & mp->m_blockmask)) {
unaligned_io = 1;
+
+ /*
+ * We can't properly handle unaligned direct I/O to reflink
+ * files yet, as we can't unshare a partial block.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
+ return -EREMCHG;
+ }
iolock = XFS_IOLOCK_EXCL;
} else {
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@@ -544,22 +561,20 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-
- /* If this is a block-aligned directio CoW, remap immediately. */
- if (xfs_is_reflink_inode(ip) && !unaligned_io) {
- ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
- if (ret)
- goto out;
- }
-
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
xfs_iunlock(ip, iolock);
@@ -584,7 +599,12 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -614,8 +634,10 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
- int iolock = XFS_IOLOCK_EXCL;
+ int iolock;
+write_retry:
+ iolock = XFS_IOLOCK_EXCL;
xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
@@ -625,7 +647,6 @@ xfs_file_buffered_aio_write(
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
-write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0))
@@ -641,26 +662,31 @@ write_retry:
* running at the same time.
*/
if (ret == -EDQUOT && !enospc) {
+ xfs_iunlock(ip, iolock);
enospc = xfs_inode_free_quota_eofblocks(ip);
if (enospc)
goto write_retry;
enospc = xfs_inode_free_quota_cowblocks(ip);
if (enospc)
goto write_retry;
+ iolock = 0;
} else if (ret == -ENOSPC && !enospc) {
struct xfs_eofblocks eofb = {0};
enospc = 1;
xfs_flush_inodes(ip->i_mount);
- eofb.eof_scan_owner = ip->i_ino; /* for locking */
+
+ xfs_iunlock(ip, iolock);
eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+ xfs_icache_free_cowblocks(ip->i_mount, &eofb);
goto write_retry;
}
current->backing_dev_info = NULL;
out:
- xfs_iunlock(ip, iolock);
+ if (iolock)
+ xfs_iunlock(ip, iolock);
return ret;
}
@@ -748,7 +774,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
@@ -770,7 +796,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
@@ -886,6 +912,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
+ file->f_mode |= FMODE_AIO_NOWAIT;
return 0;
}
@@ -908,9 +935,9 @@ xfs_dir_open(
*/
mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- xfs_dir3_data_readahead(ip, 0, -1);
+ error = xfs_dir3_data_readahead(ip, 0, -1);
xfs_iunlock(ip, mode);
- return 0;
+ return error;
}
STATIC int
@@ -944,395 +971,7 @@ xfs_file_readdir(
*/
bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
- return xfs_readdir(ip, ctx, bufsize);
-}
-
-/*
- * This type is designed to indicate the type of offset we would like
- * to search from page cache for xfs_seek_hole_data().
- */
-enum {
- HOLE_OFF = 0,
- DATA_OFF,
-};
-
-/*
- * Lookup the desired type of offset from the given page.
- *
- * On success, return true and the offset argument will point to the
- * start of the region that was found. Otherwise this function will
- * return false and keep the offset argument unchanged.
- */
-STATIC bool
-xfs_lookup_buffer_offset(
- struct page *page,
- loff_t *offset,
- unsigned int type)
-{
- loff_t lastoff = page_offset(page);
- bool found = false;
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- /*
- * Unwritten extents that have data in the page
- * cache covering them can be identified by the
- * BH_Unwritten state flag. Pages with multiple
- * buffers might have a mix of holes, data and
- * unwritten extents - any buffer with valid
- * data in it should have BH_Uptodate flag set
- * on it.
- */
- if (buffer_unwritten(bh) ||
- buffer_uptodate(bh)) {
- if (type == DATA_OFF)
- found = true;
- } else {
- if (type == HOLE_OFF)
- found = true;
- }
-
- if (found) {
- *offset = lastoff;
- break;
- }
- lastoff += bh->b_size;
- } while ((bh = bh->b_this_page) != head);
-
- return found;
-}
-
-/*
- * This routine is called to find out and return a data or hole offset
- * from the page cache for unwritten extents according to the desired
- * type for xfs_seek_hole_data().
- *
- * The argument offset is used to tell where we start to search from the
- * page cache. Map is used to figure out the end points of the range to
- * lookup pages.
- *
- * Return true if the desired type of offset was found, and the argument
- * offset is filled with that address. Otherwise, return false and keep
- * offset unchanged.
- */
-STATIC bool
-xfs_find_get_desired_pgoff(
- struct inode *inode,
- struct xfs_bmbt_irec *map,
- unsigned int type,
- loff_t *offset)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct pagevec pvec;
- pgoff_t index;
- pgoff_t end;
- loff_t endoff;
- loff_t startoff = *offset;
- loff_t lastoff = startoff;
- bool found = false;
-
- pagevec_init(&pvec, 0);
-
- index = startoff >> PAGE_SHIFT;
- endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
- end = endoff >> PAGE_SHIFT;
- do {
- int want;
- unsigned nr_pages;
- unsigned int i;
-
- want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
- want);
- /*
- * No page mapped into given range. If we are searching holes
- * and if this is the first time we got into the loop, it means
- * that the given offset is landed in a hole, return it.
- *
- * If we have already stepped through some block buffers to find
- * holes but they all contains data. In this case, the last
- * offset is already updated and pointed to the end of the last
- * mapped page, if it does not reach the endpoint to search,
- * that means there should be a hole between them.
- */
- if (nr_pages == 0) {
- /* Data search found nothing */
- if (type == DATA_OFF)
- break;
-
- ASSERT(type == HOLE_OFF);
- if (lastoff == startoff || lastoff < endoff) {
- found = true;
- *offset = lastoff;
- }
- break;
- }
-
- /*
- * At lease we found one page. If this is the first time we
- * step into the loop, and if the first page index offset is
- * greater than the given search offset, a hole was found.
- */
- if (type == HOLE_OFF && lastoff == startoff &&
- lastoff < page_offset(pvec.pages[0])) {
- found = true;
- break;
- }
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- loff_t b_offset;
-
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL),
- * or even swizzled back from swapper_space to tmpfs
- * file mapping. However, page->index will not change
- * because we have a reference on the page.
- *
- * Searching done if the page index is out of range.
- * If the current offset is not reaches the end of
- * the specified search range, there should be a hole
- * between them.
- */
- if (page->index > end) {
- if (type == HOLE_OFF && lastoff < endoff) {
- *offset = lastoff;
- found = true;
- }
- goto out;
- }
-
- lock_page(page);
- /*
- * Page truncated or invalidated(page->mapping == NULL).
- * We can freely skip it and proceed to check the next
- * page.
- */
- if (unlikely(page->mapping != inode->i_mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!page_has_buffers(page)) {
- unlock_page(page);
- continue;
- }
-
- found = xfs_lookup_buffer_offset(page, &b_offset, type);
- if (found) {
- /*
- * The found offset may be less than the start
- * point to search if this is the first time to
- * come here.
- */
- *offset = max_t(loff_t, startoff, b_offset);
- unlock_page(page);
- goto out;
- }
-
- /*
- * We either searching data but nothing was found, or
- * searching hole but found a data buffer. In either
- * case, probably the next page contains the desired
- * things, update the last offset to it so.
- */
- lastoff = page_offset(page) + PAGE_SIZE;
- unlock_page(page);
- }
-
- /*
- * The number of returned pages less than our desired, search
- * done. In this case, nothing was found for searching data,
- * but we found a hole behind the last offset.
- */
- if (nr_pages < want) {
- if (type == HOLE_OFF) {
- *offset = lastoff;
- found = true;
- }
- break;
- }
-
- index = pvec.pages[i - 1]->index + 1;
- pagevec_release(&pvec);
- } while (index <= end);
-
-out:
- pagevec_release(&pvec);
- return found;
-}
-
-/*
- * caller must lock inode with xfs_ilock_data_map_shared,
- * can we craft an appropriate ASSERT?
- *
- * end is because the VFS-level lseek interface is defined such that any
- * offset past i_size shall return -ENXIO, but we use this for quota code
- * which does not maintain i_size, and we want to SEEK_DATA past i_size.
- */
-loff_t
-__xfs_seek_hole_data(
- struct inode *inode,
- loff_t start,
- loff_t end,
- int whence)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- loff_t uninitialized_var(offset);
- xfs_fileoff_t fsbno;
- xfs_filblks_t lastbno;
- int error;
-
- if (start >= end) {
- error = -ENXIO;
- goto out_error;
- }
-
- /*
- * Try to read extents from the first block indicated
- * by fsbno to the end block of the file.
- */
- fsbno = XFS_B_TO_FSBT(mp, start);
- lastbno = XFS_B_TO_FSB(mp, end);
-
- for (;;) {
- struct xfs_bmbt_irec map[2];
- int nmap = 2;
- unsigned int i;
-
- error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
- XFS_BMAPI_ENTIRE);
- if (error)
- goto out_error;
-
- /* No extents at given offset, must be beyond EOF */
- if (nmap == 0) {
- error = -ENXIO;
- goto out_error;
- }
-
- for (i = 0; i < nmap; i++) {
- offset = max_t(loff_t, start,
- XFS_FSB_TO_B(mp, map[i].br_startoff));
-
- /* Landed in the hole we wanted? */
- if (whence == SEEK_HOLE &&
- map[i].br_startblock == HOLESTARTBLOCK)
- goto out;
-
- /* Landed in the data extent we wanted? */
- if (whence == SEEK_DATA &&
- (map[i].br_startblock == DELAYSTARTBLOCK ||
- (map[i].br_state == XFS_EXT_NORM &&
- !isnullstartblock(map[i].br_startblock))))
- goto out;
-
- /*
- * Landed in an unwritten extent, try to search
- * for hole or data from page cache.
- */
- if (map[i].br_state == XFS_EXT_UNWRITTEN) {
- if (xfs_find_get_desired_pgoff(inode, &map[i],
- whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
- &offset))
- goto out;
- }
- }
-
- /*
- * We only received one extent out of the two requested. This
- * means we've hit EOF and didn't find what we are looking for.
- */
- if (nmap == 1) {
- /*
- * If we were looking for a hole, set offset to
- * the end of the file (i.e., there is an implicit
- * hole at the end of any file).
- */
- if (whence == SEEK_HOLE) {
- offset = end;
- break;
- }
- /*
- * If we were looking for data, it's nowhere to be found
- */
- ASSERT(whence == SEEK_DATA);
- error = -ENXIO;
- goto out_error;
- }
-
- ASSERT(i > 1);
-
- /*
- * Nothing was found, proceed to the next round of search
- * if the next reading offset is not at or beyond EOF.
- */
- fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
- start = XFS_FSB_TO_B(mp, fsbno);
- if (start >= end) {
- if (whence == SEEK_HOLE) {
- offset = end;
- break;
- }
- ASSERT(whence == SEEK_DATA);
- error = -ENXIO;
- goto out_error;
- }
- }
-
-out:
- /*
- * If at this point we have found the hole we wanted, the returned
- * offset may be bigger than the file size as it may be aligned to
- * page boundary for unwritten extents. We need to deal with this
- * situation in particular.
- */
- if (whence == SEEK_HOLE)
- offset = min_t(loff_t, offset, end);
-
- return offset;
-
-out_error:
- return error;
-}
-
-STATIC loff_t
-xfs_seek_hole_data(
- struct file *file,
- loff_t start,
- int whence)
-{
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- uint lock;
- loff_t offset, end;
- int error = 0;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- lock = xfs_ilock_data_map_shared(ip);
-
- end = i_size_read(inode);
- offset = __xfs_seek_hole_data(inode, start, end, whence);
- if (offset < 0) {
- error = offset;
- goto out_unlock;
- }
-
- offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-
-out_unlock:
- xfs_iunlock(ip, lock);
-
- if (error)
- return error;
- return offset;
+ return xfs_readdir(NULL, ip, ctx, bufsize);
}
STATIC loff_t
@@ -1341,17 +980,25 @@ xfs_file_llseek(
loff_t offset,
int whence)
{
+ struct inode *inode = file->f_mapping->host;
+
+ if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+ return -EIO;
+
switch (whence) {
- case SEEK_END:
- case SEEK_CUR:
- case SEEK_SET:
+ default:
return generic_file_llseek(file, offset, whence);
case SEEK_HOLE:
+ offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+ break;
case SEEK_DATA:
- return xfs_seek_hole_data(file, offset, whence);
- default:
- return -EINVAL;
+ offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+ break;
}
+
+ if (offset < 0)
+ return offset;
+ return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
/*
@@ -1373,22 +1020,21 @@ xfs_file_llseek(
*/
STATIC int
xfs_filemap_page_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_page_mkwrite(XFS_I(inode));
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
} else {
- ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
+ ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
}
@@ -1400,23 +1046,22 @@ xfs_filemap_page_mkwrite(
STATIC int
xfs_filemap_fault(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_fault(XFS_I(inode));
/* DAX can shortcut the normal fault path on write faults! */
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
- return xfs_filemap_page_mkwrite(vma, vmf);
+ return xfs_filemap_page_mkwrite(vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode))
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
else
- ret = filemap_fault(vma, vmf);
+ ret = filemap_fault(vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
@@ -1425,36 +1070,34 @@ xfs_filemap_fault(
/*
* Similar to xfs_filemap_fault(), the DAX fault path can call into here on
* both read and write faults. Hence we need to handle both cases. There is no
- * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * ->huge_mkwrite callout for huge pages, so we have a single function here to
* handle both cases here. @flags carries the information on the type of fault
* occuring.
*/
STATIC int
-xfs_filemap_pmd_fault(
- struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags)
+xfs_filemap_huge_fault(
+ struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret;
if (!IS_DAX(inode))
return VM_FAULT_FALLBACK;
- trace_xfs_filemap_pmd_fault(ip);
+ trace_xfs_filemap_huge_fault(ip);
- if (flags & FAULT_FLAG_WRITE) {
+ if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- if (flags & FAULT_FLAG_WRITE)
+ if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
@@ -1468,11 +1111,10 @@ xfs_filemap_pmd_fault(
*/
static int
xfs_filemap_pfn_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size;
@@ -1480,7 +1122,7 @@ xfs_filemap_pfn_mkwrite(
trace_xfs_filemap_pfn_mkwrite(ip);
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
/* check if the faulting page hasn't raced with truncate */
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
@@ -1488,7 +1130,7 @@ xfs_filemap_pfn_mkwrite(
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
@@ -1497,7 +1139,7 @@ xfs_filemap_pfn_mkwrite(
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = xfs_filemap_fault,
- .pmd_fault = xfs_filemap_pmd_fault,
+ .huge_fault = xfs_filemap_huge_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
.pfn_mkwrite = xfs_filemap_pfn_mkwrite,
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
new file mode 100644
index 000000000000..814ed729881d
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.c
@@ -0,0 +1,943 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_error.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bit.h"
+#include <linux/fsmap.h>
+#include "xfs_fsmap.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_rtalloc.h"
+
+/* Convert an xfs_fsmap to an fsmap. */
+void
+xfs_fsmap_from_internal(
+ struct fsmap *dest,
+ struct xfs_fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BBTOB(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BBTOB(src->fmr_offset);
+ dest->fmr_length = BBTOB(src->fmr_length);
+ dest->fmr_reserved[0] = 0;
+ dest->fmr_reserved[1] = 0;
+ dest->fmr_reserved[2] = 0;
+}
+
+/* Convert an fsmap to an xfs_fsmap. */
+void
+xfs_fsmap_to_internal(
+ struct xfs_fsmap *dest,
+ struct fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BTOBBT(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BTOBBT(src->fmr_offset);
+ dest->fmr_length = BTOBBT(src->fmr_length);
+}
+
+/* Convert an fsmap owner into an rmapbt owner. */
+static int
+xfs_fsmap_owner_to_rmap(
+ struct xfs_rmap_irec *dest,
+ struct xfs_fsmap *src)
+{
+ if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
+ dest->rm_owner = src->fmr_owner;
+ return 0;
+ }
+
+ switch (src->fmr_owner) {
+ case 0: /* "lowest owner id possible" */
+ case -1ULL: /* "highest owner id possible" */
+ dest->rm_owner = 0;
+ break;
+ case XFS_FMR_OWN_FREE:
+ dest->rm_owner = XFS_RMAP_OWN_NULL;
+ break;
+ case XFS_FMR_OWN_UNKNOWN:
+ dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
+ break;
+ case XFS_FMR_OWN_FS:
+ dest->rm_owner = XFS_RMAP_OWN_FS;
+ break;
+ case XFS_FMR_OWN_LOG:
+ dest->rm_owner = XFS_RMAP_OWN_LOG;
+ break;
+ case XFS_FMR_OWN_AG:
+ dest->rm_owner = XFS_RMAP_OWN_AG;
+ break;
+ case XFS_FMR_OWN_INOBT:
+ dest->rm_owner = XFS_RMAP_OWN_INOBT;
+ break;
+ case XFS_FMR_OWN_INODES:
+ dest->rm_owner = XFS_RMAP_OWN_INODES;
+ break;
+ case XFS_FMR_OWN_REFC:
+ dest->rm_owner = XFS_RMAP_OWN_REFC;
+ break;
+ case XFS_FMR_OWN_COW:
+ dest->rm_owner = XFS_RMAP_OWN_COW;
+ break;
+ case XFS_FMR_OWN_DEFECTIVE: /* not implemented */
+ /* fall through */
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Convert an rmapbt owner into an fsmap owner. */
+static int
+xfs_fsmap_owner_from_rmap(
+ struct xfs_fsmap *dest,
+ struct xfs_rmap_irec *src)
+{
+ dest->fmr_flags = 0;
+ if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
+ dest->fmr_owner = src->rm_owner;
+ return 0;
+ }
+ dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
+
+ switch (src->rm_owner) {
+ case XFS_RMAP_OWN_FS:
+ dest->fmr_owner = XFS_FMR_OWN_FS;
+ break;
+ case XFS_RMAP_OWN_LOG:
+ dest->fmr_owner = XFS_FMR_OWN_LOG;
+ break;
+ case XFS_RMAP_OWN_AG:
+ dest->fmr_owner = XFS_FMR_OWN_AG;
+ break;
+ case XFS_RMAP_OWN_INOBT:
+ dest->fmr_owner = XFS_FMR_OWN_INOBT;
+ break;
+ case XFS_RMAP_OWN_INODES:
+ dest->fmr_owner = XFS_FMR_OWN_INODES;
+ break;
+ case XFS_RMAP_OWN_REFC:
+ dest->fmr_owner = XFS_FMR_OWN_REFC;
+ break;
+ case XFS_RMAP_OWN_COW:
+ dest->fmr_owner = XFS_FMR_OWN_COW;
+ break;
+ case XFS_RMAP_OWN_NULL: /* "free" */
+ dest->fmr_owner = XFS_FMR_OWN_FREE;
+ break;
+ default:
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+
+/* getfsmap query state */
+struct xfs_getfsmap_info {
+ struct xfs_fsmap_head *head;
+ xfs_fsmap_format_t formatter; /* formatting fn */
+ void *format_arg; /* format buffer */
+ struct xfs_buf *agf_bp; /* AGF, for refcount queries */
+ xfs_daddr_t next_daddr; /* next daddr we expect */
+ u64 missing_owner; /* owner of holes */
+ u32 dev; /* device id */
+ xfs_agnumber_t agno; /* AG number, if applicable */
+ struct xfs_rmap_irec low; /* low rmap key */
+ struct xfs_rmap_irec high; /* high rmap key */
+ bool last; /* last extent? */
+};
+
+/* Associate a device with a getfsmap handler. */
+struct xfs_getfsmap_dev {
+ u32 dev;
+ int (*fn)(struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info);
+};
+
+/* Compare two getfsmap device handlers. */
+static int
+xfs_getfsmap_dev_compare(
+ const void *p1,
+ const void *p2)
+{
+ const struct xfs_getfsmap_dev *d1 = p1;
+ const struct xfs_getfsmap_dev *d2 = p2;
+
+ return d1->dev - d2->dev;
+}
+
+/* Decide if this mapping is shared. */
+STATIC int
+xfs_getfsmap_is_shared(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ bool *stat)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int error;
+
+ *stat = false;
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+ /* rt files will have agno set to NULLAGNUMBER */
+ if (info->agno == NULLAGNUMBER)
+ return 0;
+
+ /* Are there any shared blocks here? */
+ flen = 0;
+ cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno, NULL);
+
+ error = xfs_refcount_find_shared(cur, rec->rm_startblock,
+ rec->rm_blockcount, &fbno, &flen, false);
+
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ *stat = flen > 0;
+ return 0;
+}
+
+/*
+ * Format a reverse mapping for getfsmap, having translated rm_startblock
+ * into the appropriate daddr units.
+ */
+STATIC int
+xfs_getfsmap_helper(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ xfs_daddr_t rec_daddr)
+{
+ struct xfs_fsmap fmr;
+ struct xfs_mount *mp = tp->t_mountp;
+ bool shared;
+ int error;
+
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ /*
+ * Filter out records that start before our startpoint, if the
+ * caller requested that.
+ */
+ if (xfs_rmap_compare(rec, &info->low) < 0) {
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /* Are we just counting mappings? */
+ if (info->head->fmh_count == 0) {
+ if (rec_daddr > info->next_daddr)
+ info->head->fmh_entries++;
+
+ if (info->last)
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+
+ info->head->fmh_entries++;
+
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the record starts past the last physical block we saw,
+ * then we've found a gap. Report the gap as being owned by
+ * whatever the caller specified is the missing owner.
+ */
+ if (rec_daddr > info->next_daddr) {
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = info->next_daddr;
+ fmr.fmr_owner = info->missing_owner;
+ fmr.fmr_offset = 0;
+ fmr.fmr_length = rec_daddr - info->next_daddr;
+ fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+ }
+
+ if (info->last)
+ goto out;
+
+ /* Fill out the extent we found */
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = rec_daddr;
+ error = xfs_fsmap_owner_from_rmap(&fmr, rec);
+ if (error)
+ return error;
+ fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
+ fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
+ fmr.fmr_flags |= FMR_OF_PREALLOC;
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ fmr.fmr_flags |= FMR_OF_ATTR_FORK;
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
+ if (fmr.fmr_flags == 0) {
+ error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
+ if (error)
+ return error;
+ if (shared)
+ fmr.fmr_flags |= FMR_OF_SHARED;
+ }
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+
+out:
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+}
+
+/* Transform a rmapbt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ xfs_fsblock_t fsb;
+ xfs_daddr_t rec_daddr;
+
+ fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock);
+ rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
+
+ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+}
+
+/* Transform a rtbitmap "record" into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_helper(
+ struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+}
+
+/* Transform a bnobt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_bnobt_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno,
+ rec->ar_startblock);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
+}
+
+/* Set rmap flags based on the getfsmap flags */
+static void
+xfs_getfsmap_set_irec_flags(
+ struct xfs_rmap_irec *irec,
+ struct xfs_fsmap *fmr)
+{
+ irec->rm_flags = 0;
+ if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
+ irec->rm_flags |= XFS_RMAP_ATTR_FORK;
+ if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
+ irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
+ if (fmr->fmr_flags & FMR_OF_PREALLOC)
+ irec->rm_flags |= XFS_RMAP_UNWRITTEN;
+}
+
+/* Execute a getfsmap query against the log device. */
+STATIC int
+xfs_getfsmap_logdev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rmap_irec rmap;
+ int error;
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->low, keys);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+
+ error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
+ if (error)
+ return error;
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+ info->missing_owner = XFS_FMR_OWN_FREE;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high);
+
+ if (keys[0].fmr_physical > 0)
+ return 0;
+
+ /* Fabricate an rmap entry for the external log device. */
+ rmap.rm_startblock = 0;
+ rmap.rm_blockcount = mp->m_sb.sb_logblocks;
+ rmap.rm_owner = XFS_RMAP_OWN_LOG;
+ rmap.rm_offset = 0;
+ rmap.rm_flags = 0;
+
+ return xfs_getfsmap_helper(tp, info, &rmap, 0);
+}
+
+/* Execute a getfsmap query against the realtime device. */
+STATIC int
+__xfs_getfsmap_rtdev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ int (*query_fn)(struct xfs_trans *,
+ struct xfs_getfsmap_info *),
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ if (keys[0].fmr_physical >= eofs)
+ return 0;
+ if (keys[1].fmr_physical >= eofs)
+ keys[1].fmr_physical = eofs - 1;
+ start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
+ end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = start_fsb;
+ error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
+ if (error)
+ return error;
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+
+ info->high.rm_startblock = end_fsb;
+ error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
+ if (error)
+ return error;
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
+ info->high.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high);
+
+ return query_fn(tp, info);
+}
+
+/* Actually query the realtime bitmap. */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_query(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_rtalloc_rec alow;
+ struct xfs_rtalloc_rec ahigh;
+ int error;
+
+ xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
+
+ alow.ar_startblock = info->low.rm_startblock;
+ ahigh.ar_startblock = info->high.rm_startblock;
+ error = xfs_rtalloc_query_range(tp, &alow, &ahigh,
+ xfs_getfsmap_rtdev_rtbitmap_helper, info);
+ if (error)
+ goto err;
+
+ /* Report any gaps at the end of the rtbitmap */
+ info->last = true;
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info);
+ if (error)
+ goto err;
+err:
+ xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
+ return error;
+}
+
+/* Execute a getfsmap query against the realtime device rtbitmap. */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
+ return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
+ info);
+}
+
+/* Execute a getfsmap query against the regular data device. */
+STATIC int
+__xfs_getfsmap_datadev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info,
+ int (*query_fn)(struct xfs_trans *,
+ struct xfs_getfsmap_info *,
+ struct xfs_btree_cur **,
+ void *),
+ void *priv)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *bt_cur = NULL;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_agnumber_t start_ag;
+ xfs_agnumber_t end_ag;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (keys[0].fmr_physical >= eofs)
+ return 0;
+ if (keys[1].fmr_physical >= eofs)
+ keys[1].fmr_physical = eofs - 1;
+ start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
+ end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical);
+
+ /*
+ * Convert the fsmap low/high keys to AG based keys. Initialize
+ * low to the fsmap low key and max out the high key to the end
+ * of the AG.
+ */
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+
+ start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
+ end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ /*
+ * Set the AG high key from the fsmap high key if this
+ * is the last AG that we're querying.
+ */
+ if (info->agno == end_ag) {
+ info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
+ end_fsb);
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp,
+ keys[1].fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
+ if (error)
+ goto err;
+ xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
+ }
+
+ if (bt_cur) {
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ xfs_trans_brelse(tp, info->agf_bp);
+ info->agf_bp = NULL;
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
+ &info->agf_bp);
+ if (error)
+ goto err;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ &info->high);
+
+ error = query_fn(tp, info, &bt_cur, priv);
+ if (error)
+ goto err;
+
+ /*
+ * Set the AG low key to the start of the AG prior to
+ * moving on to the next AG.
+ */
+ if (info->agno == start_ag) {
+ info->low.rm_startblock = 0;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+ info->low.rm_flags = 0;
+ }
+ }
+
+ /* Report any gap at the end of the AG */
+ info->last = true;
+ error = query_fn(tp, info, &bt_cur, priv);
+ if (error)
+ goto err;
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (info->agf_bp) {
+ xfs_trans_brelse(tp, info->agf_bp);
+ info->agf_bp = NULL;
+ }
+
+ return error;
+}
+
+/* Actually query the rmap btree. */
+STATIC int
+xfs_getfsmap_datadev_rmapbt_query(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_btree_cur **curpp,
+ void *priv)
+{
+ /* Report any gap at the end of the last AG. */
+ if (info->last)
+ return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
+
+ /* Allocate cursor for this AG and query_range it. */
+ *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
+ info->agno);
+ return xfs_rmap_query_range(*curpp, &info->low, &info->high,
+ xfs_getfsmap_datadev_helper, info);
+}
+
+/* Execute a getfsmap query against the regular data device rmapbt. */
+STATIC int
+xfs_getfsmap_datadev_rmapbt(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ info->missing_owner = XFS_FMR_OWN_FREE;
+ return __xfs_getfsmap_datadev(tp, keys, info,
+ xfs_getfsmap_datadev_rmapbt_query, NULL);
+}
+
+/* Actually query the bno btree. */
+STATIC int
+xfs_getfsmap_datadev_bnobt_query(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_btree_cur **curpp,
+ void *priv)
+{
+ struct xfs_alloc_rec_incore *key = priv;
+
+ /* Report any gap at the end of the last AG. */
+ if (info->last)
+ return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
+
+ /* Allocate cursor for this AG and query_range it. */
+ *curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
+ info->agno, XFS_BTNUM_BNO);
+ key->ar_startblock = info->low.rm_startblock;
+ key[1].ar_startblock = info->high.rm_startblock;
+ return xfs_alloc_query_range(*curpp, key, &key[1],
+ xfs_getfsmap_datadev_bnobt_helper, info);
+}
+
+/* Execute a getfsmap query against the regular data device's bnobt. */
+STATIC int
+xfs_getfsmap_datadev_bnobt(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_alloc_rec_incore akeys[2];
+
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
+ return __xfs_getfsmap_datadev(tp, keys, info,
+ xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
+}
+
+/* Do we recognize the device? */
+STATIC bool
+xfs_getfsmap_is_valid_device(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *fm)
+{
+ if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
+ fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
+ return true;
+ if (mp->m_logdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
+ return true;
+ if (mp->m_rtdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
+ return true;
+ return false;
+}
+
+/* Ensure that the low key is less than the high key. */
+STATIC bool
+xfs_getfsmap_check_keys(
+ struct xfs_fsmap *low_key,
+ struct xfs_fsmap *high_key)
+{
+ if (low_key->fmr_device > high_key->fmr_device)
+ return false;
+ if (low_key->fmr_device < high_key->fmr_device)
+ return true;
+
+ if (low_key->fmr_physical > high_key->fmr_physical)
+ return false;
+ if (low_key->fmr_physical < high_key->fmr_physical)
+ return true;
+
+ if (low_key->fmr_owner > high_key->fmr_owner)
+ return false;
+ if (low_key->fmr_owner < high_key->fmr_owner)
+ return true;
+
+ if (low_key->fmr_offset > high_key->fmr_offset)
+ return false;
+ if (low_key->fmr_offset < high_key->fmr_offset)
+ return true;
+
+ return false;
+}
+
+#define XFS_GETFSMAP_DEVS 3
+/*
+ * Get filesystem's extents as described in head, and format for
+ * output. Calls formatter to fill the user's buffer until all
+ * extents are mapped, until the passed-in head->fmh_count slots have
+ * been filled, or until the formatter short-circuits the loop, if it
+ * is tracking filled-in extents on its own.
+ *
+ * Key to Confusion
+ * ----------------
+ * There are multiple levels of keys and counters at work here:
+ * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
+ * these reflect fs-wide sector addrs.
+ * dkeys -- fmh_keys used to query each device;
+ * these are fmh_keys but w/ the low key
+ * bumped up by fmr_length.
+ * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
+ * is how we detect gaps in the fsmap
+ records and report them.
+ * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
+ * dkeys; used to query the metadata.
+ */
+int
+xfs_getfsmap(
+ struct xfs_mount *mp,
+ struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter,
+ void *arg)
+{
+ struct xfs_trans *tp = NULL;
+ struct xfs_fsmap dkeys[2]; /* per-dev keys */
+ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
+ struct xfs_getfsmap_info info = { NULL };
+ bool use_rmap;
+ int i;
+ int error = 0;
+
+ if (head->fmh_iflags & ~FMH_IF_VALID)
+ return -EINVAL;
+ if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
+ !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
+ return -EINVAL;
+
+ use_rmap = capable(CAP_SYS_ADMIN) &&
+ xfs_sb_version_hasrmapbt(&mp->m_sb);
+ head->fmh_entries = 0;
+
+ /* Set up our device handlers. */
+ memset(handlers, 0, sizeof(handlers));
+ handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
+ if (use_rmap)
+ handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
+ else
+ handlers[0].fn = xfs_getfsmap_datadev_bnobt;
+ if (mp->m_logdev_targp != mp->m_ddev_targp) {
+ handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
+ handlers[1].fn = xfs_getfsmap_logdev;
+ }
+ if (mp->m_rtdev_targp) {
+ handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
+ handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
+ }
+
+ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
+ xfs_getfsmap_dev_compare);
+
+ /*
+ * To continue where we left off, we allow userspace to use the
+ * last mapping from a previous call as the low key of the next.
+ * This is identified by a non-zero length in the low key. We
+ * have to increment the low key in this scenario to ensure we
+ * don't return the same mapping again, and instead return the
+ * very next mapping.
+ *
+ * If the low key mapping refers to file data, the same physical
+ * blocks could be mapped to several other files/offsets.
+ * According to rmapbt record ordering, the minimal next
+ * possible record for the block range is the next starting
+ * offset in the same inode. Therefore, bump the file offset to
+ * continue the search appropriately. For all other low key
+ * mapping types (attr blocks, metadata), bump the physical
+ * offset as there can be no other mapping for the same physical
+ * block range.
+ */
+ dkeys[0] = head->fmh_keys[0];
+ if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+ dkeys[0].fmr_physical += dkeys[0].fmr_length;
+ dkeys[0].fmr_owner = 0;
+ if (dkeys[0].fmr_offset)
+ return -EINVAL;
+ } else
+ dkeys[0].fmr_offset += dkeys[0].fmr_length;
+ dkeys[0].fmr_length = 0;
+ memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
+
+ if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
+ return -EINVAL;
+
+ info.next_daddr = head->fmh_keys[0].fmr_physical +
+ head->fmh_keys[0].fmr_length;
+ info.formatter = formatter;
+ info.format_arg = arg;
+ info.head = head;
+
+ /* For each device we support... */
+ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
+ /* Is this device within the range the user asked for? */
+ if (!handlers[i].fn)
+ continue;
+ if (head->fmh_keys[0].fmr_device > handlers[i].dev)
+ continue;
+ if (head->fmh_keys[1].fmr_device < handlers[i].dev)
+ break;
+
+ /*
+ * If this device number matches the high key, we have
+ * to pass the high key to the handler to limit the
+ * query results. If the device number exceeds the
+ * low key, zero out the low key so that we get
+ * everything from the beginning.
+ */
+ if (handlers[i].dev == head->fmh_keys[1].fmr_device)
+ dkeys[1] = head->fmh_keys[1];
+ if (handlers[i].dev > head->fmh_keys[0].fmr_device)
+ memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ break;
+
+ info.dev = handlers[i].dev;
+ info.last = false;
+ info.agno = NULLAGNUMBER;
+ error = handlers[i].fn(tp, dkeys, &info);
+ if (error)
+ break;
+ xfs_trans_cancel(tp);
+ tp = NULL;
+ info.next_daddr = 0;
+ }
+
+ if (tp)
+ xfs_trans_cancel(tp);
+ head->fmh_oflags = FMH_OF_DEV_T;
+ return error;
+}
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
new file mode 100644
index 000000000000..0b9bf822595c
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_FSMAP_H__
+#define __XFS_FSMAP_H__
+
+struct fsmap;
+
+/* internal fsmap representation */
+struct xfs_fsmap {
+ dev_t fmr_device; /* device id */
+ uint32_t fmr_flags; /* mapping flags */
+ uint64_t fmr_physical; /* device offset of segment */
+ uint64_t fmr_owner; /* owner id */
+ xfs_fileoff_t fmr_offset; /* file offset of segment */
+ xfs_filblks_t fmr_length; /* length of segment, blocks */
+};
+
+struct xfs_fsmap_head {
+ uint32_t fmh_iflags; /* control flags */
+ uint32_t fmh_oflags; /* output flags */
+ unsigned int fmh_count; /* # of entries in array incl. input */
+ unsigned int fmh_entries; /* # of entries filled in (output). */
+
+ struct xfs_fsmap fmh_keys[2]; /* low and high keys */
+};
+
+void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
+void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
+
+/* fsmap to userspace formatter - copy to user & advance pointer */
+typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
+
+int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter, void *arg);
+
+#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 242e8091296d..8f22fc579dbb 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -352,12 +352,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
@@ -381,12 +376,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
@@ -413,8 +403,8 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
- agno, XFS_BTREE_CRC_BLOCKS);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
+ agno, 0);
block = XFS_BUF_TO_BLOCK(bp);
@@ -488,12 +478,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -513,13 +498,8 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
- 0, 0, agno,
- XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
- 0, agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO,
+ 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -540,9 +520,8 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC,
- 0, 0, agno,
- XFS_BTREE_CRC_BLOCKS);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC,
+ 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -623,7 +602,7 @@ xfs_growfs_data_private(
if (nagimax)
mp->m_maxagi = nagimax;
if (mp->m_sb.sb_imax_pct) {
- __uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
+ uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
do_div(icount, 100);
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
} else
@@ -814,17 +793,17 @@ xfs_fs_counts(
int
xfs_reserve_blocks(
xfs_mount_t *mp,
- __uint64_t *inval,
+ uint64_t *inval,
xfs_fsop_resblks_t *outval)
{
- __int64_t lcounter, delta;
- __int64_t fdblks_delta = 0;
- __uint64_t request;
- __int64_t free;
+ int64_t lcounter, delta;
+ int64_t fdblks_delta = 0;
+ uint64_t request;
+ int64_t free;
int error = 0;
/* If inval is null, report current values and return */
- if (inval == (__uint64_t *)NULL) {
+ if (inval == (uint64_t *)NULL) {
if (!outval)
return -EINVAL;
outval->resblks = mp->m_resblks;
@@ -925,7 +904,7 @@ out:
int
xfs_fs_goingdown(
xfs_mount_t *mp,
- __uint32_t inflags)
+ uint32_t inflags)
{
switch (inflags) {
case XFS_FSOP_GOING_FLAGS_DEFAULT: {
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f34915898fea..2954c13a3acd 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -22,9 +22,9 @@ extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
-extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
+extern int xfs_reserve_blocks(xfs_mount_t *mp, uint64_t *inval,
xfs_fsop_resblks_t *outval);
-extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern int xfs_fs_goingdown(xfs_mount_t *mp, uint32_t inflags);
extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 687a4b01fc53..3e1cc3001bcb 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -47,4 +47,9 @@ xfs_param_t xfs_params = {
struct xfs_globals xfs_globals = {
.log_recovery_delay = 0, /* no delay by default */
+#ifdef XFS_ASSERT_FATAL
+ .bug_on_assert = true, /* assert failures BUG() */
+#else
+ .bug_on_assert = false, /* assert failures WARN() */
+#endif
};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 70ca4f608321..0a9e6985a0d0 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (pag->pag_ici_reclaimable++)
return;
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (--pag->pag_ici_reclaimable)
return;
@@ -262,6 +261,22 @@ xfs_inode_clear_reclaim_tag(
xfs_perag_clear_reclaim_tag(pag);
}
+static void
+xfs_inew_wait(
+ struct xfs_inode *ip)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
+ DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
+
+ do {
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ if (!xfs_iflags_test(ip, XFS_INEW))
+ break;
+ schedule();
+ } while (true);
+ finish_wait(wq, &wait.wq_entry);
+}
+
/*
* When we recycle a reclaimable inode, we need to re-initialise the VFS inode
* part of the structure. This is made more complex by the fact we store
@@ -353,6 +368,11 @@ xfs_iget_cache_hit(
if (ip->i_flags & XFS_IRECLAIMABLE) {
trace_xfs_iget_reclaim(ip);
+ if (flags & XFS_IGET_INCORE) {
+ error = -EAGAIN;
+ goto out_error;
+ }
+
/*
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
* from stomping over us while we recycle the inode. We can't
@@ -366,14 +386,17 @@ xfs_iget_cache_hit(
error = xfs_reinit_inode(mp, inode);
if (error) {
+ bool wake;
/*
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
*/
rcu_read_lock();
spin_lock(&ip->i_flags_lock);
-
+ wake = !!__xfs_iflags_test(ip, XFS_INEW);
ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
+ if (wake)
+ wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
trace_xfs_iget_reclaim_fail(ip);
goto out_error;
@@ -414,7 +437,8 @@ xfs_iget_cache_hit(
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
- xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+ if (!(flags & XFS_IGET_INCORE))
+ xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
XFS_STATS_INC(mp, xs_ig_found);
return 0;
@@ -585,6 +609,10 @@ again:
goto out_error_or_again;
} else {
rcu_read_unlock();
+ if (flags & XFS_IGET_INCORE) {
+ error = -ENOENT;
+ goto out_error_or_again;
+ }
XFS_STATS_INC(mp, xs_ig_missed);
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
@@ -605,7 +633,7 @@ again:
return 0;
out_error_or_again:
- if (error == -EAGAIN) {
+ if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
delay(1);
goto again;
}
@@ -614,6 +642,44 @@ out_error_or_again:
}
/*
+ * "Is this a cached inode that's also allocated?"
+ *
+ * Look up an inode by number in the given file system. If the inode is
+ * in cache and isn't in purgatory, return 1 if the inode is allocated
+ * and 0 if it is not. For all other cases (not in cache, being torn
+ * down, etc.), return a negative error code.
+ *
+ * The caller has to prevent inode allocation and freeing activity,
+ * presumably by locking the AGI buffer. This is to ensure that an
+ * inode cannot transition from allocated to freed until the caller is
+ * ready to allow that. If the inode is in an intermediate state (new,
+ * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
+ * inode is not in the cache, -ENOENT will be returned. The caller must
+ * deal with these scenarios appropriately.
+ *
+ * This is a specialized use case for the online scrubber; if you're
+ * reading this, you probably want xfs_iget.
+ */
+int
+xfs_icache_inode_is_allocated(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ bool *inuse)
+{
+ struct xfs_inode *ip;
+ int error;
+
+ error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
+ if (error)
+ return error;
+
+ *inuse = !!(VFS_I(ip)->i_mode);
+ IRELE(ip);
+ return 0;
+}
+
+/*
* The inode lookup is done in batches to keep the amount of lock traffic and
* radix tree lookups to a minimum. The batch size is a trade off between
* lookup reduction and stack usage. This is in the reclaim path, so we can't
@@ -623,9 +689,11 @@ out_error_or_again:
STATIC int
xfs_inode_ag_walk_grab(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ int flags)
{
struct inode *inode = VFS_I(ip);
+ bool newinos = !!(flags & XFS_AGITER_INEW_WAIT);
ASSERT(rcu_read_lock_held());
@@ -643,7 +711,8 @@ xfs_inode_ag_walk_grab(
goto out_unlock_noent;
/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) ||
+ __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
goto out_unlock_noent;
spin_unlock(&ip->i_flags_lock);
@@ -671,7 +740,8 @@ xfs_inode_ag_walk(
void *args),
int flags,
void *args,
- int tag)
+ int tag,
+ int iter_flags)
{
uint32_t first_index;
int last_error = 0;
@@ -713,7 +783,7 @@ restart:
for (i = 0; i < nr_found; i++) {
struct xfs_inode *ip = batch[i];
- if (done || xfs_inode_ag_walk_grab(ip))
+ if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
batch[i] = NULL;
/*
@@ -741,6 +811,9 @@ restart:
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
+ if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
+ xfs_iflags_test(batch[i], XFS_INEW))
+ xfs_inew_wait(batch[i]);
error = execute(batch[i], flags, args);
IRELE(batch[i]);
if (error == -EAGAIN) {
@@ -820,12 +893,13 @@ xfs_cowblocks_worker(
}
int
-xfs_inode_ag_iterator(
+xfs_inode_ag_iterator_flags(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags,
void *args),
int flags,
- void *args)
+ void *args,
+ int iter_flags)
{
struct xfs_perag *pag;
int error = 0;
@@ -835,7 +909,8 @@ xfs_inode_ag_iterator(
ag = 0;
while ((pag = xfs_perag_get(mp, ag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
+ iter_flags);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -847,6 +922,17 @@ xfs_inode_ag_iterator(
}
int
+xfs_inode_ag_iterator(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, int flags,
+ void *args),
+ int flags,
+ void *args)
+{
+ return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
+}
+
+int
xfs_inode_ag_iterator_tag(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags,
@@ -863,7 +949,8 @@ xfs_inode_ag_iterator_tag(
ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
+ 0);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -1322,13 +1409,10 @@ xfs_inode_free_eofblocks(
int flags,
void *args)
{
- int ret;
+ int ret = 0;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;
- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
trace_xfs_inode_free_eofblocks_invalid(ip);
@@ -1356,21 +1440,19 @@ xfs_inode_free_eofblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
-
- /* don't revisit the inode if we're not waiting */
- if (ret == -EAGAIN && !(flags & SYNC_WAIT))
- ret = 0;
+ /*
+ * If the caller is waiting, return -EAGAIN to keep the background
+ * scanner moving and revisit the inode in a subsequent pass.
+ */
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ if (flags & SYNC_WAIT)
+ ret = -EAGAIN;
+ return ret;
+ }
+ ret = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -1417,15 +1499,10 @@ __xfs_inode_free_quota_eofblocks(
struct xfs_eofblocks eofb = {0};
struct xfs_dquot *dq;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
/*
- * Set the scan owner to avoid a potential livelock. Otherwise, the scan
- * can repeatedly trylock on the inode we're currently processing. We
- * run a sync scan to increase effectiveness and use the union filter to
+ * Run a sync scan to increase effectiveness and use the union filter to
* cover all applicable quotas in a single scan.
*/
- eofb.eof_scan_owner = ip->i_ino;
eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
@@ -1577,12 +1654,9 @@ xfs_inode_free_cowblocks(
{
int ret;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
@@ -1615,28 +1689,16 @@ xfs_inode_free_cowblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}
/* Free the CoW blocks */
- if (need_iolock) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- }
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
- if (need_iolock) {
- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- }
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a1e02f4708ab..bff4d85e5498 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,7 +27,6 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
- xfs_ino_t eof_scan_owner;
};
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -48,6 +47,12 @@ struct xfs_eofblocks {
#define XFS_IGET_CREATE 0x1
#define XFS_IGET_UNTRUSTED 0x2
#define XFS_IGET_DONTCACHE 0x4
+#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */
+
+/*
+ * flags for AG inode iterator
+ */
+#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp);
@@ -80,6 +85,9 @@ void xfs_cowblocks_worker(struct work_struct *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args);
+int xfs_inode_ag_iterator_flags(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, int flags, void *args),
+ int flags, void *args, int iter_flags);
int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args, int tag);
@@ -102,7 +110,6 @@ xfs_fs_eofblocks_from_user(
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
- dst->eof_scan_owner = NULLFSINO;
dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
@@ -120,4 +127,7 @@ xfs_fs_eofblocks_from_user(
return 0;
}
+int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t ino, bool *inuse);
+
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index de32f0fe47c8..ceef77c0416a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -50,6 +50,7 @@
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_inode_zone;
@@ -621,17 +622,17 @@ __xfs_iflock(
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
- prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags,
+ uint16_t di_flags,
uint64_t di_flags2,
bool has_attr)
{
@@ -854,8 +855,8 @@ xfs_ialloc(
inode->i_version = 1;
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
- ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
+ ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
+ ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
}
@@ -1615,7 +1616,7 @@ xfs_itruncate_extents(
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
- last_block);
+ last_block, true);
if (error)
goto out;
@@ -1692,32 +1693,34 @@ xfs_release(
if (xfs_can_free_eofblocks(ip, false)) {
/*
+ * Check if the inode is being opened, written and closed
+ * frequently and we have delayed allocation blocks outstanding
+ * (e.g. streaming writes from the NFS server), truncating the
+ * blocks past EOF will cause fragmentation to occur.
+ *
+ * In this case don't do the truncation, but we have to be
+ * careful how we detect this case. Blocks beyond EOF show up as
+ * i_delayed_blks even when the inode is clean, so we need to
+ * truncate them away first before checking for a dirty release.
+ * Hence on the first dirty close we will still remove the
+ * speculative allocation, but after that we will leave it in
+ * place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+ /*
* If we can't get the iolock just skip truncating the blocks
* past EOF because we could deadlock with the mmap_sem
- * otherwise. We'll get another chance to drop them once the
+ * otherwise. We'll get another chance to drop them once the
* last reference to the inode is dropped, so we'll never leak
* blocks permanently.
- *
- * Further, check if the inode is being opened, written and
- * closed frequently and we have delayed allocation blocks
- * outstanding (e.g. streaming writes from the NFS server),
- * truncating the blocks past EOF will cause fragmentation to
- * occur.
- *
- * In this case don't do the truncation, either, but we have to
- * be careful how we detect this case. Blocks beyond EOF show
- * up as i_delayed_blks even when the inode is clean, so we
- * need to truncate them away first before checking for a dirty
- * release. Hence on the first dirty close we will still remove
- * the speculative allocation, but after that we will leave it
- * in place.
*/
- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
- return 0;
-
- error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != -EAGAIN)
- return error;
+ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ error = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ if (error)
+ return error;
+ }
/* delalloc blocks after truncation means it really is dirty */
if (ip->i_delayed_blks)
@@ -1903,9 +1906,13 @@ xfs_inactive(
* force is true because we are evicting an inode from the
* cache. Post-eof blocks must be freed, lest we end up with
* broken free space accounting.
+ *
+ * Note: don't bother with iolock here since lockdep complains
+ * about acquiring it in reclaim context. We have the only
+ * reference to the inode at this point anyways.
*/
if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(mp, ip, false);
+ xfs_free_eofblocks(ip);
return;
}
@@ -2479,11 +2486,11 @@ __xfs_iunpin_wait(
xfs_iunpin(ip);
do {
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_ipincount(ip))
io_schedule();
} while (xfs_ipincount(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
void
@@ -3482,7 +3489,7 @@ xfs_iflush_int(
dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
- mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
+ mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
@@ -3492,7 +3499,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
+ mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad regular inode %Lu, ptr 0x%p",
__func__, ip->i_ino, ip);
@@ -3503,7 +3510,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
- mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
+ mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad directory inode %Lu, ptr 0x%p",
__func__, ip->i_ino, ip);
@@ -3511,8 +3518,7 @@ xfs_iflush_int(
}
}
if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
- ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
- XFS_RANDOM_IFLUSH_5)) {
+ ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: detected corrupt incore inode %Lu, "
"total extents = %d, nblocks = %Ld, ptr 0x%p",
@@ -3522,7 +3528,7 @@ xfs_iflush_int(
goto corrupt_out;
}
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
- mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
+ mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
@@ -3541,6 +3547,12 @@ xfs_iflush_int(
if (ip->i_d.di_version < 3)
ip->i_d.di_flushiter++;
+ /* Check the inline directory data. */
+ if (S_ISDIR(VFS_I(ip)->i_mode) &&
+ ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+ xfs_dir2_sf_verify(ip))
+ goto corrupt_out;
+
/*
* Copy the dirty parts of the inode into the on-disk inode. We always
* copy out the core of the inode, because if the inode is dirty at all
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 10dcf27b4c85..0ee453de239a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -192,8 +192,8 @@ static inline void
xfs_set_projid(struct xfs_inode *ip,
prid_t projid)
{
- ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
- ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+ ip->i_d.di_projid_hi = (uint16_t) (projid >> 16);
+ ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff);
}
static inline prid_t
@@ -216,7 +216,8 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
-#define XFS_INEW (1 << 3) /* inode has just been allocated */
+#define __XFS_INEW_BIT 3 /* inode has just been allocated */
+#define XFS_INEW (1 << __XFS_INEW_BIT)
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -444,9 +445,6 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing);
int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
bool *did_zero);
-loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
- loff_t eof, int whence);
-
/* from xfs_iops.c */
extern void xfs_setup_inode(struct xfs_inode *ip);
@@ -464,6 +462,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(VFS_I(ip));
+ wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
}
static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d90e7811ccdd..013cc78d7daf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -731,22 +731,27 @@ xfs_iflush_done(
* holding the lock before removing the inode from the AIL.
*/
if (need_ail) {
- struct xfs_log_item *log_items[need_ail];
- int i = 0;
+ bool mlip_changed = false;
+
+ /* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->xa_lock);
for (blip = lip; blip; blip = blip->li_bio_list) {
- iip = INODE_ITEM(blip);
- if (iip->ili_logged &&
- blip->li_lsn == iip->ili_flush_lsn) {
- log_items[i++] = blip;
- }
- ASSERT(i <= need_ail);
+ if (INODE_ITEM(blip)->ili_logged &&
+ blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
+ mlip_changed |= xfs_ail_delete_one(ailp, blip);
}
- /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
- xfs_trans_ail_delete_bulk(ailp, log_items, i,
- SHUTDOWN_CORRUPT_INCORE);
- }
+ if (mlip_changed) {
+ if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
+ xlog_assign_tail_lsn_locked(ailp->xa_mount);
+ if (list_empty(&ailp->xa_ail))
+ wake_up_all(&ailp->xa_empty);
+ }
+ spin_unlock(&ailp->xa_lock);
+
+ if (mlip_changed)
+ xfs_log_space_wake(ailp->xa_mount);
+ }
/*
* clean up and unlock the flush lock now we are done. We can clear the
@@ -829,9 +834,7 @@ xfs_inode_item_format_convert(
in_f->ilf_dsize = in_f32->ilf_dsize;
in_f->ilf_ino = in_f32->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f32->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f32->ilf_blkno;
in_f->ilf_len = in_f32->ilf_len;
in_f->ilf_boffset = in_f32->ilf_boffset;
@@ -846,9 +849,7 @@ xfs_inode_item_format_convert(
in_f->ilf_dsize = in_f64->ilf_dsize;
in_f->ilf_ino = in_f64->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f64->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f64->ilf_blkno;
in_f->ilf_len = in_f64->ilf_len;
in_f->ilf_boffset = in_f64->ilf_boffset;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c67cfb451fd3..9c0c7a920304 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -41,8 +41,12 @@
#include "xfs_trans.h"
#include "xfs_pnfs.h"
#include "xfs_acl.h"
+#include "xfs_btree.h"
+#include <linux/fsmap.h>
+#include "xfs_fsmap.h"
#include <linux/capability.h>
+#include <linux/cred.h>
#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -116,8 +120,7 @@ xfs_find_handle(
handle.ha_fid.fid_pad = 0;
handle.ha_fid.fid_gen = inode->i_generation;
handle.ha_fid.fid_ino = ip->i_ino;
-
- hsize = XFS_HSIZE(handle);
+ hsize = sizeof(xfs_handle_t);
}
error = -EFAULT;
@@ -440,8 +443,8 @@ xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags)
+ uint32_t *len,
+ uint32_t flags)
{
unsigned char *kbuf;
int error = -EFAULT;
@@ -469,8 +472,8 @@ xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags)
+ uint32_t len,
+ uint32_t flags)
{
unsigned char *kbuf;
int error;
@@ -495,7 +498,7 @@ int
xfs_attrmulti_attr_remove(
struct inode *inode,
unsigned char *name,
- __uint32_t flags)
+ uint32_t flags)
{
int error;
@@ -873,7 +876,7 @@ xfs_merge_ioc_xflags(
STATIC unsigned int
xfs_di2lxflags(
- __uint16_t di_flags)
+ uint16_t di_flags)
{
unsigned int flags = 0;
@@ -1284,7 +1287,7 @@ xfs_ioctl_setattr_check_projid(
struct fsxattr *fa)
{
/* Disallow 32bit project ids if projid32bit feature is not enabled. */
- if (fa->fsx_projid > (__uint16_t)-1 &&
+ if (fa->fsx_projid > (uint16_t)-1 &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
return -EINVAL;
@@ -1524,7 +1527,7 @@ out_drop_write:
}
STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+xfs_getbmap_format(void **ap, struct getbmapx *bmv)
{
struct getbmap __user *base = (struct getbmap __user *)*ap;
@@ -1542,10 +1545,11 @@ xfs_ioc_getbmap(
unsigned int cmd,
void __user *arg)
{
- struct getbmapx bmx;
+ struct getbmapx bmx = { 0 };
int error;
- if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+ /* struct getbmap is a strict subset of struct getbmapx. */
+ if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
return -EFAULT;
if (bmx.bmv_count < 2)
@@ -1567,7 +1571,7 @@ xfs_ioc_getbmap(
}
STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
{
struct getbmapx __user *base = (struct getbmapx __user *)*ap;
@@ -1607,6 +1611,84 @@ xfs_ioc_getbmapx(
return 0;
}
+struct getfsmap_info {
+ struct xfs_mount *mp;
+ struct fsmap_head __user *data;
+ unsigned int idx;
+ __u32 last_flags;
+};
+
+STATIC int
+xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
+{
+ struct getfsmap_info *info = priv;
+ struct fsmap fm;
+
+ trace_xfs_getfsmap_mapping(info->mp, xfm);
+
+ info->last_flags = xfm->fmr_flags;
+ xfs_fsmap_from_internal(&fm, xfm);
+ if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
+ sizeof(struct fsmap)))
+ return -EFAULT;
+
+ return 0;
+}
+
+STATIC int
+xfs_ioc_getfsmap(
+ struct xfs_inode *ip,
+ struct fsmap_head __user *arg)
+{
+ struct getfsmap_info info = { NULL };
+ struct xfs_fsmap_head xhead = {0};
+ struct fsmap_head head;
+ bool aborted = false;
+ int error;
+
+ if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
+ return -EFAULT;
+ if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
+ memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
+ sizeof(head.fmh_keys[0].fmr_reserved)) ||
+ memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
+ sizeof(head.fmh_keys[1].fmr_reserved)))
+ return -EINVAL;
+
+ xhead.fmh_iflags = head.fmh_iflags;
+ xhead.fmh_count = head.fmh_count;
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
+ xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
+
+ trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+ trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
+
+ info.mp = ip->i_mount;
+ info.data = arg;
+ error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
+ if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ error = 0;
+ aborted = true;
+ } else if (error)
+ return error;
+
+ /* If we didn't abort, set the "last" flag in the last fmx */
+ if (!aborted && info.idx) {
+ info.last_flags |= FMR_OF_LAST;
+ if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
+ &info.last_flags, sizeof(info.last_flags)))
+ return -EFAULT;
+ }
+
+ /* copy back header */
+ head.fmh_entries = xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
+ return -EFAULT;
+
+ return 0;
+}
+
int
xfs_ioc_swapext(
xfs_swapext_t *sxp)
@@ -1787,6 +1869,9 @@ xfs_file_ioctl(
case XFS_IOC_GETBMAPX:
return xfs_ioc_getbmapx(ip, arg);
+ case FS_IOC_GETFSMAP:
+ return xfs_ioc_getfsmap(ip, arg);
+
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
case XFS_IOC_PATH_TO_FSHANDLE: {
@@ -1846,7 +1931,7 @@ xfs_file_ioctl(
case XFS_IOC_SET_RESBLKS: {
xfs_fsop_resblks_t inout;
- __uint64_t in;
+ uint64_t in;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1932,12 +2017,12 @@ xfs_file_ioctl(
}
case XFS_IOC_GOINGDOWN: {
- __uint32_t in;
+ uint32_t in;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (get_user(in, (__uint32_t __user *)arg))
+ if (get_user(in, (uint32_t __user *)arg))
return -EFAULT;
return xfs_fs_goingdown(mp, in);
@@ -1952,14 +2037,14 @@ xfs_file_ioctl(
if (copy_from_user(&in, arg, sizeof(in)))
return -EFAULT;
- return xfs_errortag_add(in.errtag, mp);
+ return xfs_errortag_add(mp, in.errtag);
}
case XFS_IOC_ERROR_CLEARALL:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return xfs_errortag_clearall(mp, 1);
+ return xfs_errortag_clearall(mp);
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_fs_eofblocks eofb;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 8b52881bfd90..e86c3ea137d2 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -48,22 +48,22 @@ xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags);
+ uint32_t *len,
+ uint32_t flags);
extern int
xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags);
+ uint32_t len,
+ uint32_t flags);
extern int
xfs_attrmulti_attr_remove(
struct inode *inode,
unsigned char *name,
- __uint32_t flags);
+ uint32_t flags);
extern struct dentry *
xfs_handle_to_dentry(
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7c49938c5aed..fa0bc4d46065 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -20,6 +20,7 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/fsmap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
@@ -554,6 +555,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_GOINGDOWN:
case XFS_IOC_ERROR_INJECTION:
case XFS_IOC_ERROR_CLEARALL:
+ case FS_IOC_GETFSMAP:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index b1bb45444df8..5492bcf6f442 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -112,9 +112,9 @@ typedef struct compat_xfs_fsop_handlereq {
/* The bstat field in the swapext struct needs translation */
typedef struct compat_xfs_swapext {
- __int64_t sx_version; /* version */
- __int64_t sx_fdtarget; /* fd of target file */
- __int64_t sx_fdtmp; /* fd of tmp file */
+ int64_t sx_version; /* version */
+ int64_t sx_fdtarget; /* fd of target file */
+ int64_t sx_fdtmp; /* fd of tmp file */
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1aa3abd67b36..813394c62849 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -162,7 +162,7 @@ xfs_iomap_write_direct(
xfs_fileoff_t last_fsb;
xfs_filblks_t count_fsb, resaligned;
xfs_fsblock_t firstfsb;
- xfs_extlen_t extsz, temp;
+ xfs_extlen_t extsz;
int nimaps;
int quota_flag;
int rt;
@@ -203,14 +203,7 @@ xfs_iomap_write_direct(
}
count_fsb = last_fsb - offset_fsb;
ASSERT(count_fsb > 0);
-
- resaligned = count_fsb;
- if (unlikely(extsz)) {
- if ((temp = do_mod(offset_fsb, extsz)))
- resaligned += temp;
- if ((temp = do_mod(resaligned, extsz)))
- resaligned += extsz - temp;
- }
+ resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
if (unlikely(rt)) {
resrtextents = qblocks = resaligned;
@@ -247,7 +240,7 @@ xfs_iomap_write_direct(
*/
if (IS_DAX(VFS_I(ip))) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
- if (ISUNWRITTEN(imap)) {
+ if (imap->br_state == XFS_EXT_UNWRITTEN) {
tflags |= XFS_TRANS_RESERVE;
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
}
@@ -550,7 +543,7 @@ xfs_file_iomap_begin_delay(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_unlock;
@@ -637,6 +630,11 @@ retry:
goto out_unlock;
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -685,7 +683,7 @@ xfs_iomap_write_allocate(
int nres;
if (whichfork == XFS_COW_FORK)
- flags |= XFS_BMAPI_COWFORK;
+ flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
/*
* Make sure that the dquots are there.
@@ -947,7 +945,7 @@ static inline bool imap_needs_alloc(struct inode *inode,
return !nimaps ||
imap->br_startblock == HOLESTARTBLOCK ||
imap->br_startblock == DELAYSTARTBLOCK ||
- (IS_DAX(inode) && ISUNWRITTEN(imap));
+ (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
}
static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
@@ -978,6 +976,7 @@ xfs_file_iomap_begin(
int nimaps = 1, error = 0;
bool shared = false, trimmed = false;
unsigned lockmode;
+ struct block_device *bdev;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -996,53 +995,51 @@ xfs_file_iomap_begin(
lockmode = xfs_ilock_data_map_shared(ip);
}
+ if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
- if (xfs_is_reflink_inode(ip) &&
- (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
- shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
- if (shared) {
- xfs_iunlock(ip, lockmode);
- goto alloc_done;
- }
- ASSERT(!isnullstartblock(imap.br_startblock));
- }
-
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
- if ((flags & IOMAP_REPORT) ||
- (xfs_is_reflink_inode(ip) &&
- (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
+ if (flags & IOMAP_REPORT) {
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
if (error)
goto out_unlock;
-
- /*
- * We're here because we're trying to do a directio write to a
- * region that isn't aligned to a filesystem block. If the
- * extent is shared, fall back to buffered mode to handle the
- * RMW.
- */
- if (!(flags & IOMAP_REPORT) && shared) {
- trace_xfs_reflink_bounce_dio_write(ip, &imap);
- error = -EREMCHG;
- goto out_unlock;
- }
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_reserve_cow(ip, &imap, &shared);
- if (error)
- goto out_unlock;
+ if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /* may drop and re-acquire the ilock */
+ error = xfs_reflink_allocate_cow(ip, &imap, &shared,
+ &lockmode);
+ if (error)
+ goto out_unlock;
+ } else {
+ error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+ if (error)
+ goto out_unlock;
+ }
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
@@ -1050,6 +1047,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
@@ -1071,7 +1076,6 @@ xfs_file_iomap_begin(
if (error)
return error;
-alloc_done:
iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {
@@ -1082,6 +1086,14 @@ alloc_done:
}
xfs_bmbt_to_iomap(ip, iomap, &imap);
+
+ /* optionally associate a dax device with the iomap bdev */
+ bdev = iomap->bdev;
+ if (blk_queue_dax(bdev->bd_queue))
+ iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
+ else
+ iomap->dax_dev = NULL;
+
if (shared)
iomap->flags |= IOMAP_F_SHARED;
return 0;
@@ -1095,25 +1107,46 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
- ssize_t written)
+ ssize_t written,
+ struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
- start_fsb = XFS_B_TO_FSB(mp, offset + written);
+ /*
+ * Behave as if the write failed if drop writes is enabled. Set the NEW
+ * flag to force delalloc cleanup.
+ */
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
+ iomap->flags |= IOMAP_F_NEW;
+ written = 0;
+ }
+
+ /*
+ * start_fsb refers to the first unused block after a short write. If
+ * nothing was written, round offset down to point at the first block in
+ * the range.
+ */
+ if (unlikely(!written))
+ start_fsb = XFS_B_TO_FSBT(mp, offset);
+ else
+ start_fsb = XFS_B_TO_FSB(mp, offset + written);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
- * Trim back delalloc blocks if we didn't manage to write the whole
- * range reserved.
+ * Trim delalloc blocks if they were allocated by this write and we
+ * didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
- if (start_fsb < end_fsb) {
+ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
+ truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
+ XFS_FSB_TO_B(mp, end_fsb) - 1);
+
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
end_fsb - start_fsb);
@@ -1138,13 +1171,14 @@ xfs_file_iomap_end(
unsigned flags,
struct iomap *iomap)
{
+ fs_put_dax(iomap->dax_dev);
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written);
+ length, written, iomap);
return 0;
}
-struct iomap_ops xfs_iomap_ops = {
+const struct iomap_ops xfs_iomap_ops = {
.iomap_begin = xfs_file_iomap_begin,
.iomap_end = xfs_file_iomap_end,
};
@@ -1168,10 +1202,10 @@ xfs_xattr_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- lockmode = xfs_ilock_data_map_shared(ip);
+ lockmode = xfs_ilock_attr_map_shared(ip);
/* if there are no attribute fork or extents, return ENOENT */
- if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+ if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
error = -ENOENT;
goto out_unlock;
}
@@ -1190,6 +1224,6 @@ out_unlock:
return error;
}
-struct iomap_ops xfs_xattr_iomap_ops = {
+const struct iomap_ops xfs_xattr_iomap_ops = {
.iomap_begin = xfs_xattr_iomap_begin,
};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 6d45cf01fcff..00db3ecea084 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -33,7 +33,27 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *);
xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
-extern struct iomap_ops xfs_iomap_ops;
-extern struct iomap_ops xfs_xattr_iomap_ops;
+static inline xfs_filblks_t
+xfs_aligned_fsb_count(
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ xfs_extlen_t extsz)
+{
+ if (extsz) {
+ xfs_extlen_t align;
+
+ align = do_mod(offset_fsb, extsz);
+ if (align)
+ count_fsb += align;
+ align = do_mod(count_fsb, extsz);
+ if (align)
+ count_fsb += extsz - align;
+ }
+
+ return count_fsb;
+}
+
+extern const struct iomap_ops xfs_iomap_ops;
+extern const struct iomap_ops xfs_xattr_iomap_ops;
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c16155f1b4..469c9fa4c178 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -190,12 +190,12 @@ xfs_generic_create(
#ifdef CONFIG_XFS_POSIX_ACL
if (default_acl) {
- error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
goto out_cleanup_inode;
}
if (acl) {
- error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
if (error)
goto out_cleanup_inode;
}
@@ -460,7 +460,7 @@ xfs_vn_get_link(
if (!dentry)
return ERR_PTR(-ECHILD);
- link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+ link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
@@ -489,11 +489,12 @@ xfs_vn_get_link_inline(
STATIC int
xfs_vn_getattr(
- struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *stat)
+ const struct path *path,
+ struct kstat *stat,
+ u32 request_mask,
+ unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -515,6 +516,20 @@ xfs_vn_getattr(
stat->blocks =
XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+ if (ip->i_d.di_version == 3) {
+ if (request_mask & STATX_BTIME) {
+ stat->result_mask |= STATX_BTIME;
+ stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
+ stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
+ }
+ }
+
+ if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
switch (inode->i_mode & S_IFMT) {
case S_IFBLK:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..c393a2f6d8c3 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,7 +31,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
-STATIC int
+int
xfs_internal_inum(
xfs_mount_t *mp,
xfs_ino_t ino)
@@ -361,7 +361,6 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
*ubcountp = 0;
*done = 0;
- irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+ irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
if (!irbuf)
return -ENOMEM;
-
- nirbuf = irbsize / sizeof(*irbuf);
+ nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
/*
* Loop over the allocation groups, starting from the last
@@ -585,7 +583,7 @@ xfs_inumbers(
return error;
bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
- buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
+ buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
do {
struct xfs_inobt_rec_incore r;
int stat;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b3912fa4..17e86e0541af 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,4 +96,6 @@ xfs_inumbers(
void __user *buffer, /* buffer with inode info */
inumbers_fmt_pf formatter);
+int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+
#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7a989de224f4..9301c5a6060b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -19,18 +19,11 @@
#define __XFS_LINUX__
#include <linux/types.h>
+#include <linux/uuid.h>
/*
* Kernel specific type declarations for XFS
*/
-typedef signed char __int8_t;
-typedef unsigned char __uint8_t;
-typedef signed short int __int16_t;
-typedef unsigned short int __uint16_t;
-typedef signed int __int32_t;
-typedef unsigned int __uint32_t;
-typedef signed long long int __int64_t;
-typedef unsigned long long int __uint64_t;
typedef __s64 xfs_off_t; /* <file offset> type */
typedef unsigned long long xfs_ino_t; /* <inode> type */
@@ -42,7 +35,6 @@ typedef __u32 xfs_nlink_t;
#include "kmem.h"
#include "mrlock.h"
-#include "uuid.h"
#include <linux/semaphore.h>
#include <linux/mm.h>
@@ -55,7 +47,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/file.h>
#include <linux/swap.h>
#include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/bitops.h>
#include <linux/major.h>
#include <linux/pagemap.h>
@@ -151,7 +143,6 @@ typedef __u32 xfs_nlink_t;
#define __return_address __builtin_return_address(0)
#define XFS_PROJID_DEFAULT 0
-#define MAXPATHLEN 1024
#define MIN(a,b) (min(a,b))
#define MAX(a,b) (max(a,b))
@@ -186,22 +177,22 @@ extern struct xstats xfsstats;
* are converting to the init_user_ns. The uid is later mapped to a particular
* user namespace value when crossing the kernel/user boundary.
*/
-static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
+static inline uint32_t xfs_kuid_to_uid(kuid_t uid)
{
return from_kuid(&init_user_ns, uid);
}
-static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
+static inline kuid_t xfs_uid_to_kuid(uint32_t uid)
{
return make_kuid(&init_user_ns, uid);
}
-static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
+static inline uint32_t xfs_kgid_to_gid(kgid_t gid)
{
return from_kgid(&init_user_ns, gid);
}
-static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
+static inline kgid_t xfs_gid_to_kgid(uint32_t gid)
{
return make_kgid(&init_user_ns, gid);
}
@@ -212,88 +203,6 @@ static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
#define xfs_stack_trace() dump_stack()
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
- __u32 mod;
-
- switch (n) {
- case 4:
- mod = *(__u32 *)a % b;
- *(__u32 *)a = *(__u32 *)a / b;
- return mod;
- case 8:
- {
- unsigned long __upper, __low, __high, __mod;
- __u64 c = *(__u64 *)a;
- __upper = __high = c >> 32;
- __low = c;
- if (__high) {
- __upper = __high % (b);
- __high = __high / (b);
- }
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
- asm("":"=A" (c):"a" (__low),"d" (__high));
- *(__u64 *)a = c;
- return __mod;
- }
- }
-
- /* NOTREACHED */
- return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
- switch (n) {
- case 4:
- return *(__u32 *)a % b;
- case 8:
- {
- unsigned long __upper, __low, __high, __mod;
- __u64 c = *(__u64 *)a;
- __upper = __high = c >> 32;
- __low = c;
- if (__high) {
- __upper = __high % (b);
- __high = __high / (b);
- }
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
- asm("":"=A" (c):"a" (__low),"d" (__high));
- return __mod;
- }
- }
-
- /* NOTREACHED */
- return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
- __u32 mod;
-
- switch (n) {
- case 4:
- mod = *(__u32 *)a % b;
- *(__u32 *)a = *(__u32 *)a / b;
- return mod;
- case 8:
- mod = do_div(*(__u64 *)a, b);
- return mod;
- }
-
- /* NOTREACHED */
- return 0;
-}
-
/* Side effect free 64 bit mod operation */
static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
{
@@ -310,20 +219,17 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
/* NOTREACHED */
return 0;
}
-#endif
-#undef do_div
-#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a))
#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+static inline uint64_t roundup_64(uint64_t x, uint32_t y)
{
x += y - 1;
do_div(x, y);
return x * y;
}
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+static inline uint64_t howmany_64(uint64_t x, uint32_t y)
{
x += y - 1;
do_div(x, y);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b1469f0a91a6..0053bcf2b10a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -434,7 +434,7 @@ xfs_log_reserve(
int unit_bytes,
int cnt,
struct xlog_ticket **ticp,
- __uint8_t client,
+ uint8_t client,
bool permanent)
{
struct xlog *log = mp->m_log;
@@ -825,9 +825,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!error) {
/* the data section must be 32 bit size aligned */
struct {
- __uint16_t magic;
- __uint16_t pad1;
- __uint32_t pad2; /* may as well make it 64 bits */
+ uint16_t magic;
+ uint16_t pad1;
+ uint32_t pad2; /* may as well make it 64 bits */
} magic = {
.magic = XLOG_UNMOUNT_TYPE,
};
@@ -1189,8 +1189,7 @@ xlog_iodone(xfs_buf_t *bp)
* IOABORT state. The IOABORT state is only set in DEBUG mode to inject
* CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
- XFS_RANDOM_IODONE_IOERR) ||
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
iclog->ic_state & XLOG_STATE_IOABORT) {
if (iclog->ic_state & XLOG_STATE_IOABORT)
iclog->ic_state &= ~XLOG_STATE_IOABORT;
@@ -1293,7 +1292,7 @@ void
xfs_log_work_queue(
struct xfs_mount *mp)
{
- queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work,
+ queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
msecs_to_jiffies(xfs_syncd_centisecs * 10));
}
@@ -1665,7 +1664,7 @@ xlog_cksum(
char *dp,
int size)
{
- __uint32_t crc;
+ uint32_t crc;
/* first generate the crc for the record header ... */
crc = xfs_start_cksum_update((char *)rhead,
@@ -1828,7 +1827,7 @@ xlog_sync(
*/
dptr = (char *)&iclog->ic_header + count;
for (i = 0; i < split; i += BBSIZE) {
- __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
+ uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
if (++cycle == XLOG_HEADER_MAGIC_NUM)
cycle++;
*(__be32 *)dptr = cpu_to_be32(cycle);
@@ -1842,7 +1841,6 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
-#ifdef DEBUG
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
@@ -1850,15 +1848,13 @@ xlog_sync(
* write on I/O completion and shutdown the fs. The subsequent mount
* detects the bad CRC and attempts to recover.
*/
- if (log->l_badcrc_factor &&
- (prandom_u32() % log->l_badcrc_factor == 0)) {
- iclog->ic_header.h_crc &= 0xAAAAAAAA;
+ if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
+ iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
iclog->ic_state |= XLOG_STATE_IOABORT;
xfs_warn(log->l_mp,
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
be64_to_cpu(iclog->ic_header.h_lsn));
}
-#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
@@ -2024,7 +2020,7 @@ xlog_print_tic_res(
};
#undef REG_TYPE_STR
- xfs_warn(mp, "xlog_write: reservation summary:");
+ xfs_warn(mp, "ticket reservation summary:");
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
xfs_warn(mp, " current res = %d bytes",
@@ -2045,10 +2041,55 @@ xlog_print_tic_res(
"bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
+}
+
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_log_item_desc *lidp;
+
+ /* dump core transaction and ticket info */
+ xfs_warn(mp, "transaction summary:");
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+
+ xlog_print_tic_res(mp, tp->t_ticket);
- xfs_alert_tag(mp, XFS_PTAG_LOGRES,
- "xlog_write: reservation ran out. Need to up reservation");
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ /* dump each log item */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv = lip->li_lv;
+ struct xfs_log_iovec *vec;
+ int i;
+
+ xfs_warn(mp, "log item: ");
+ xfs_warn(mp, " type = 0x%x", lip->li_type);
+ xfs_warn(mp, " flags = 0x%x", lip->li_flags);
+ if (!lv)
+ continue;
+ xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
+ xfs_warn(mp, " size = %d", lv->lv_size);
+ xfs_warn(mp, " bytes = %d", lv->lv_bytes);
+ xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
+
+ /* dump each iovec for the log item */
+ vec = lv->lv_iovecp;
+ for (i = 0; i < lv->lv_niovecs; i++) {
+ int dumplen = min(vec->i_len, 32);
+
+ xfs_warn(mp, " iovec[%d]", i);
+ xfs_warn(mp, " type = 0x%x", vec->i_type);
+ xfs_warn(mp, " len = %d", vec->i_len);
+ xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
+ xfs_hex_dump(vec->i_addr, dumplen);
+
+ vec++;
+ }
+ }
}
/*
@@ -2321,8 +2362,12 @@ xlog_write(
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
- if (ticket->t_curr_res < 0)
+ if (ticket->t_curr_res < 0) {
+ xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+ "ctx ticket reservation ran out. Need to up reservation");
xlog_print_tic_res(log->l_mp, ticket);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ }
index = 0;
lv = log_vector;
@@ -2363,8 +2408,8 @@ xlog_write(
}
reg = &vecp[index];
- ASSERT(reg->i_len % sizeof(__int32_t) == 0);
- ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+ ASSERT(reg->i_len % sizeof(int32_t) == 0);
+ ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
start_rec_copy = xlog_write_start_rec(ptr, ticket);
if (start_rec_copy) {
@@ -3143,7 +3188,7 @@ xlog_state_switch_iclogs(
/* Round up to next log-sunit */
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
log->l_mp->m_sb.sb_logsunit > 1) {
- __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
+ uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
}
@@ -3771,7 +3816,7 @@ xlog_verify_iclog(
xlog_in_core_2_t *xhdr;
void *base_ptr, *ptr, *p;
ptrdiff_t field_offset;
- __uint8_t clientid;
+ uint8_t clientid;
int len, i, j, k, op_len;
int idx;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b5e71072fde5..bf212772595c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -124,7 +124,6 @@ struct xlog_ticket;
struct xfs_log_item;
struct xfs_item_ops;
struct xfs_trans;
-struct xfs_log_callback;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
@@ -160,7 +159,7 @@ int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
struct xlog_ticket **ticket,
- __uint8_t clientid,
+ uint8_t clientid,
bool permanent);
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
void xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index a4ab192e1792..fbe72b134bef 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -30,6 +30,9 @@
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_trace.h"
+
+struct workqueue_struct *xfs_discard_wq;
/*
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
@@ -407,6 +410,7 @@ xlog_cil_insert_items(
int len = 0;
int diff_iovecs = 0;
int iclog_space;
+ int iovhdr_res = 0, split_res = 0, ctx_res = 0;
ASSERT(tp);
@@ -416,30 +420,11 @@ xlog_cil_insert_items(
*/
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
- /*
- * Now (re-)position everything modified at the tail of the CIL.
- * We do this here so we only need to take the CIL lock once during
- * the transaction commit.
- */
spin_lock(&cil->xc_cil_lock);
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_item *lip = lidp->lid_item;
-
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /*
- * Only move the item if it isn't already at the tail. This is
- * to prevent a transient list_empty() state when reinserting
- * an item that is already the only item in the CIL.
- */
- if (!list_is_last(&lip->li_cil, &cil->xc_cil))
- list_move_tail(&lip->li_cil, &cil->xc_cil);
- }
/* account for space used by new iovec headers */
- len += diff_iovecs * sizeof(xlog_op_header_t);
+ iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+ len += iovhdr_res;
ctx->nvecs += diff_iovecs;
/* attach the transaction to the CIL if it has any busy extents */
@@ -454,28 +439,66 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ ctx_res = ctx->ticket->t_unit_res;
+ ctx->ticket->t_curr_res = ctx_res;
+ tp->t_ticket->t_curr_res -= ctx_res;
}
/* do we need space for more log record headers? */
iclog_space = log->l_iclog_size - log->l_iclog_hsize;
if (len > 0 && (ctx->space_used / iclog_space !=
(ctx->space_used + len) / iclog_space)) {
- int hdrs;
-
- hdrs = (len + iclog_space - 1) / iclog_space;
+ split_res = (len + iclog_space - 1) / iclog_space;
/* need to take into account split region headers, too */
- hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += hdrs;
- ctx->ticket->t_curr_res += hdrs;
- tp->t_ticket->t_curr_res -= hdrs;
+ split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+ ctx->ticket->t_unit_res += split_res;
+ ctx->ticket->t_curr_res += split_res;
+ tp->t_ticket->t_curr_res -= split_res;
ASSERT(tp->t_ticket->t_curr_res >= len);
}
tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
+ /*
+ * If we've overrun the reservation, dump the tx details before we move
+ * the log items. Shutdown is imminent...
+ */
+ if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+ xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+ xfs_warn(log->l_mp,
+ " log items: %d bytes (iov hdrs: %d bytes)",
+ len, iovhdr_res);
+ xfs_warn(log->l_mp, " split region headers: %d bytes",
+ split_res);
+ xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
+ xlog_print_trans(tp);
+ }
+
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ /*
+ * Only move the item if it isn't already at the tail. This is
+ * to prevent a transient list_empty() state when reinserting
+ * an item that is already the only item in the CIL.
+ */
+ if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
+ }
+
spin_unlock(&cil->xc_cil_lock);
+
+ if (tp->t_ticket->t_curr_res < 0)
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
static void
@@ -491,6 +514,75 @@ xlog_cil_free_logvec(
}
}
+static void
+xlog_discard_endio_work(
+ struct work_struct *work)
+{
+ struct xfs_cil_ctx *ctx =
+ container_of(work, struct xfs_cil_ctx, discard_endio_work);
+ struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
+
+ xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
+ kmem_free(ctx);
+}
+
+/*
+ * Queue up the actual completion to a thread to avoid IRQ-safe locking for
+ * pagb_lock. Note that we need a unbounded workqueue, otherwise we might
+ * get the execution delayed up to 30 seconds for weird reasons.
+ */
+static void
+xlog_discard_endio(
+ struct bio *bio)
+{
+ struct xfs_cil_ctx *ctx = bio->bi_private;
+
+ INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work);
+ queue_work(xfs_discard_wq, &ctx->discard_endio_work);
+}
+
+static void
+xlog_discard_busy_extents(
+ struct xfs_mount *mp,
+ struct xfs_cil_ctx *ctx)
+{
+ struct list_head *list = &ctx->busy_extents;
+ struct xfs_extent_busy *busyp;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int error = 0;
+
+ ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
+
+ blk_start_plug(&plug);
+ list_for_each_entry(busyp, list, list) {
+ trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+ busyp->length);
+
+ error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+ XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+ XFS_FSB_TO_BB(mp, busyp->length),
+ GFP_NOFS, 0, &bio);
+ if (error && error != -EOPNOTSUPP) {
+ xfs_info(mp,
+ "discard failed for extent [0x%llx,%u], error %d",
+ (unsigned long long)busyp->bno,
+ busyp->length,
+ error);
+ break;
+ }
+ }
+
+ if (bio) {
+ bio->bi_private = ctx;
+ bio->bi_end_io = xlog_discard_endio;
+ submit_bio(bio);
+ } else {
+ xlog_discard_endio_work(&ctx->discard_endio_work);
+ }
+ blk_finish_plug(&plug);
+}
+
/*
* Mark all items committed and clear busy extents. We free the log vector
* chains in a separate pass so that we unpin the log items as quickly as
@@ -525,14 +617,10 @@ xlog_cil_committed(
xlog_cil_free_logvec(ctx->lv_chain);
- if (!list_empty(&ctx->busy_extents)) {
- ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
-
- xfs_discard_extents(mp, &ctx->busy_extents);
- xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
- }
-
- kmem_free(ctx);
+ if (!list_empty(&ctx->busy_extents))
+ xlog_discard_busy_extents(mp, ctx);
+ else
+ kmem_free(ctx);
}
/*
@@ -905,6 +993,7 @@ xfs_log_commit_cil(
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
+ xfs_lsn_t xc_commit_lsn;
/*
* Do all necessary memory allocation before we lock the CIL.
@@ -918,13 +1007,9 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(mp, tp->t_ticket);
-
- tp->t_commit_lsn = cil->xc_ctx->sequence;
+ xc_commit_lsn = cil->xc_ctx->sequence;
if (commit_lsn)
- *commit_lsn = tp->t_commit_lsn;
+ *commit_lsn = xc_commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, regrant);
xfs_trans_unreserve_and_mod_sb(tp);
@@ -940,7 +1025,7 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, tp->t_commit_lsn, false);
+ xfs_trans_free_items(tp, xc_commit_lsn, false);
xlog_cil_push_background(log);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 2b6eec52178e..51bf7b827387 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -257,6 +257,7 @@ struct xfs_cil_ctx {
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
struct xfs_log_callback log_cb; /* completion callback hook. */
struct list_head committing; /* ctx committing list */
+ struct work_struct discard_endio_work;
};
/*
@@ -418,7 +419,7 @@ struct xlog {
};
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
- ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
@@ -455,6 +456,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
}
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void xlog_print_trans(struct xfs_trans *);
int
xlog_write(
struct xlog *log,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4a98762ec8b4..9549188f5a36 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -352,13 +352,13 @@ xlog_header_check_mount(
{
ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
- if (uuid_is_nil(&head->h_fs_uuid)) {
+ if (uuid_is_null(&head->h_fs_uuid)) {
/*
* IRIX doesn't write the h_fs_uuid or h_fmt fields. If
- * h_fs_uuid is nil, we assume this log was last mounted
+ * h_fs_uuid is null, we assume this log was last mounted
* by IRIX and continue.
*/
- xfs_warn(mp, "nil uuid in log - IRIX style log");
+ xfs_warn(mp, "null uuid in log - IRIX style log");
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp, "log has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
@@ -2230,9 +2230,9 @@ xlog_recover_get_buf_lsn(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
- __uint32_t magic32;
- __uint16_t magic16;
- __uint16_t magicda;
+ uint32_t magic32;
+ uint16_t magic16;
+ uint16_t magicda;
void *blk = bp->b_addr;
uuid_t *uuid;
xfs_lsn_t lsn = -1;
@@ -2381,9 +2381,9 @@ xlog_recover_validate_buf_type(
xfs_lsn_t current_lsn)
{
struct xfs_da_blkinfo *info = bp->b_addr;
- __uint32_t magic32;
- __uint16_t magic16;
- __uint16_t magicda;
+ uint32_t magic32;
+ uint16_t magic16;
+ uint16_t magicda;
char *warnmsg = NULL;
/*
@@ -2852,7 +2852,7 @@ xlog_recover_buffer_pass2(
if (XFS_DINODE_MAGIC ==
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
(BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
- (__uint32_t)log->l_mp->m_inode_cluster_size))) {
+ (uint32_t)log->l_mp->m_inode_cluster_size))) {
xfs_buf_stale(bp);
error = xfs_bwrite(bp);
} else {
@@ -3423,7 +3423,7 @@ xlog_recover_efd_pass2(
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
xfs_log_item_t *lip;
- __uint64_t efi_id;
+ uint64_t efi_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3519,7 +3519,7 @@ xlog_recover_rud_pass2(
struct xfs_rud_log_format *rud_formatp;
struct xfs_rui_log_item *ruip = NULL;
struct xfs_log_item *lip;
- __uint64_t rui_id;
+ uint64_t rui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3635,7 +3635,7 @@ xlog_recover_cud_pass2(
struct xfs_cud_log_format *cud_formatp;
struct xfs_cui_log_item *cuip = NULL;
struct xfs_log_item *lip;
- __uint64_t cui_id;
+ uint64_t cui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3754,7 +3754,7 @@ xlog_recover_bud_pass2(
struct xfs_bud_log_format *bud_formatp;
struct xfs_bui_log_item *buip = NULL;
struct xfs_log_item *lip;
- __uint64_t bui_id;
+ uint64_t bui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3796,7 +3796,7 @@ xlog_recover_bud_pass2(
* This routine is called when an inode create format structure is found in a
* committed transaction in the log. It's purpose is to initialise the inodes
* being allocated on disk. This requires us to get inode cluster buffers that
- * match the range to be intialised, stamped with inode templates and written
+ * match the range to be initialised, stamped with inode templates and written
* by delayed write so that subsequent modifications will hit the cached buffer
* and only need writing out at the end of recovery.
*/
@@ -4152,7 +4152,7 @@ xlog_recover_commit_trans(
#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
- hlist_del(&trans->r_list);
+ hlist_del_init(&trans->r_list);
error = xlog_recover_reorder_trans(log, trans, pass);
if (error)
@@ -4354,6 +4354,8 @@ xlog_recover_free_trans(
xlog_recover_item_t *item, *n;
int i;
+ hlist_del_init(&trans->r_list);
+
list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
/* Free the regions in the item. */
list_del(&item->ri_list);
@@ -5224,12 +5226,16 @@ xlog_do_recovery_pass(
int error2 = 0;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
+ int i;
struct hlist_head rhash[XLOG_RHASH_SIZE];
LIST_HEAD (buffer_list);
ASSERT(head_blk != tail_blk);
rhead_blk = 0;
+ for (i = 0; i < XLOG_RHASH_SIZE; i++)
+ INIT_HLIST_HEAD(&rhash[i]);
+
/*
* Read the header of the tail block and get the iclog buffer size from
* h_size. Use this to tell how many sectors make up the log header.
@@ -5466,6 +5472,19 @@ xlog_do_recovery_pass(
if (error && first_bad)
*first_bad = rhead_blk;
+ /*
+ * Transactions are freed at commit time but transactions without commit
+ * records on disk are never committed. Free any that may be left in the
+ * hash table.
+ */
+ for (i = 0; i < XLOG_RHASH_SIZE; i++) {
+ struct hlist_node *tmp;
+ struct xlog_recover *trans;
+
+ hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
+ xlog_recover_free_trans(trans);
+ }
+
return error ? error : error2;
}
@@ -5772,9 +5791,9 @@ xlog_recover_check_summary(
xfs_buf_t *agfbp;
xfs_buf_t *agibp;
xfs_agnumber_t agno;
- __uint64_t freeblks;
- __uint64_t itotal;
- __uint64_t ifree;
+ uint64_t freeblks;
+ uint64_t itotal;
+ uint64_t ifree;
int error;
mp = log->l_mp;
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 11792d888e4e..e68bd1050eab 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -110,7 +110,10 @@ assfail(char *expr, char *file, int line)
{
xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
expr, file, line);
- BUG();
+ if (xfs_globals.bug_on_assert)
+ BUG();
+ else
+ WARN_ON(1);
}
void
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9b9540db17a6..40d4e8b4e193 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,6 +45,7 @@
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_reflink.h"
+#include "xfs_extent_busy.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -72,17 +73,20 @@ xfs_uuid_mount(
uuid_t *uuid = &mp->m_sb.sb_uuid;
int hole, i;
+ /* Publish UUID in struct super_block */
+ uuid_copy(&mp->m_super->s_uuid, uuid);
+
if (mp->m_flags & XFS_MOUNT_NOUUID)
return 0;
- if (uuid_is_nil(uuid)) {
- xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+ if (uuid_is_null(uuid)) {
+ xfs_warn(mp, "Filesystem has null UUID - can't mount");
return -EINVAL;
}
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i])) {
+ if (uuid_is_null(&xfs_uuid_table[i])) {
hole = i;
continue;
}
@@ -119,7 +123,7 @@ xfs_uuid_unmount(
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i]))
+ if (uuid_is_null(&xfs_uuid_table[i]))
continue;
if (!uuid_equal(uuid, &xfs_uuid_table[i]))
continue;
@@ -169,7 +173,7 @@ xfs_free_perag(
int
xfs_sb_validate_fsb_count(
xfs_sb_t *sbp,
- __uint64_t nblocks)
+ uint64_t nblocks)
{
ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
ASSERT(sbp->sb_blocklog >= BBSHIFT);
@@ -187,7 +191,7 @@ xfs_initialize_perag(
xfs_agnumber_t *maxagi)
{
xfs_agnumber_t index;
- xfs_agnumber_t first_initialised = 0;
+ xfs_agnumber_t first_initialised = NULLAGNUMBER;
xfs_perag_t *pag;
int error = -ENOMEM;
@@ -202,22 +206,21 @@ xfs_initialize_perag(
xfs_perag_put(pag);
continue;
}
- if (!first_initialised)
- first_initialised = index;
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
- goto out_unwind;
+ goto out_unwind_new_pags;
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
- goto out_unwind;
+ goto out_free_pag;
+ init_waitqueue_head(&pag->pagb_wait);
if (radix_tree_preload(GFP_NOFS))
- goto out_unwind;
+ goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
@@ -225,10 +228,13 @@ xfs_initialize_perag(
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
error = -EEXIST;
- goto out_unwind;
+ goto out_hash_destroy;
}
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
+ /* first new pag is fully initialized */
+ if (first_initialised == NULLAGNUMBER)
+ first_initialised = index;
}
index = xfs_set_inode_alloc(mp, agcount);
@@ -239,11 +245,16 @@ xfs_initialize_perag(
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;
-out_unwind:
+out_hash_destroy:
xfs_buf_hash_destroy(pag);
+out_free_pag:
kmem_free(pag);
- for (; index > first_initialised; index--) {
+out_unwind_new_pags:
+ /* unwind any prior newly initialized pags */
+ for (index = first_initialised; index < agcount; index++) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
+ if (!pag)
+ break;
xfs_buf_hash_destroy(pag);
kmem_free(pag);
}
@@ -424,7 +435,7 @@ STATIC void
xfs_set_maxicount(xfs_mount_t *mp)
{
xfs_sb_t *sbp = &(mp->m_sb);
- __uint64_t icount;
+ uint64_t icount;
if (sbp->sb_imax_pct) {
/*
@@ -490,7 +501,7 @@ xfs_set_low_space_thresholds(
int i;
for (i = 0; i < XFS_LOWSP_MAX; i++) {
- __uint64_t space = mp->m_sb.sb_dblocks;
+ uint64_t space = mp->m_sb.sb_dblocks;
do_div(space, 100);
mp->m_low_space[i] = space * (i + 1);
@@ -505,8 +516,7 @@ STATIC void
xfs_set_inoalignment(xfs_mount_t *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
else
mp->m_inoalign_mask = 0;
@@ -587,10 +597,10 @@ xfs_mount_reset_sbqflags(
return xfs_sync_sb(mp, false);
}
-__uint64_t
+uint64_t
xfs_default_resblks(xfs_mount_t *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
/*
* We default to 5% or 8192 fsbs of space reserved, whichever is
@@ -601,7 +611,7 @@ xfs_default_resblks(xfs_mount_t *mp)
*/
resblks = mp->m_sb.sb_dblocks;
do_div(resblks, 20);
- resblks = min_t(__uint64_t, resblks, 8192);
+ resblks = min_t(uint64_t, resblks, 8192);
return resblks;
}
@@ -621,7 +631,7 @@ xfs_mountfs(
{
struct xfs_sb *sbp = &(mp->m_sb);
struct xfs_inode *rip;
- __uint64_t resblks;
+ uint64_t resblks;
uint quotamount = 0;
uint quotaflags = 0;
int error = 0;
@@ -709,10 +719,13 @@ xfs_mountfs(
if (error)
goto out_del_stats;
+ error = xfs_errortag_init(mp);
+ if (error)
+ goto out_remove_error_sysfs;
error = xfs_uuid_mount(mp);
if (error)
- goto out_remove_error_sysfs;
+ goto out_remove_errortag;
/*
* Set the minimum read and write sizes
@@ -782,7 +795,10 @@ xfs_mountfs(
* Copies the low order bits of the timestamp and the randomly
* set "sequence" number out of a UUID.
*/
- uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+ mp->m_fixedfsid[0] =
+ (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+ get_unaligned_be16(&sbp->sb_uuid.b[4]);
+ mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
mp->m_dmevmask = 0; /* not persistent; set after each mount */
@@ -1031,6 +1047,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_remove_errortag:
+ xfs_errortag_del(mp);
out_remove_error_sysfs:
xfs_error_sysfs_del(mp);
out_del_stats:
@@ -1049,7 +1067,7 @@ void
xfs_unmountfs(
struct xfs_mount *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
int error;
cancel_delayed_work_sync(&mp->m_eofblocks_work);
@@ -1073,6 +1091,13 @@ xfs_unmountfs(
xfs_log_force(mp, XFS_LOG_SYNC);
/*
+ * Wait for all busy extents to be freed, including completion of
+ * any discard operation.
+ */
+ xfs_extent_busy_wait_all(mp);
+ flush_workqueue(xfs_discard_wq);
+
+ /*
* We now need to tell the world we are unmounting. This will allow
* us to detect that the filesystem is going away and we should error
* out anything that we have been retrying in the background. This will
@@ -1127,10 +1152,11 @@ xfs_unmountfs(
xfs_uuid_unmount(mp);
#if defined(DEBUG)
- xfs_errortag_clearall(mp, 0);
+ xfs_errortag_clearall(mp);
#endif
xfs_free_perag(mp);
+ xfs_errortag_del(mp);
xfs_error_sysfs_del(mp);
xfs_sysfs_del(&mp->m_stats.xs_kobj);
xfs_sysfs_del(&mp->m_kobj);
@@ -1191,7 +1217,7 @@ xfs_mod_icount(
struct xfs_mount *mp,
int64_t delta)
{
- __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+ percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta);
@@ -1270,7 +1296,7 @@ xfs_mod_fdblocks(
else
batch = XFS_FDBLOCKS_BATCH;
- __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+ percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7f351f706b7a..e0792d036be2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -108,10 +108,10 @@ typedef struct xfs_mount {
xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
- __uint8_t m_blkbit_log; /* blocklog + NBBY */
- __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
- __uint8_t m_agno_log; /* log #ag's */
- __uint8_t m_agino_log; /* #bits for agino in inum */
+ uint8_t m_blkbit_log; /* blocklog + NBBY */
+ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
+ uint8_t m_agno_log; /* log #ag's */
+ uint8_t m_agino_log; /* #bits for agino in inum */
uint m_inode_cluster_size;/* min inode buf size */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
@@ -139,7 +139,7 @@ typedef struct xfs_mount {
struct mutex m_growlock; /* growfs mutex */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
- __uint64_t m_flags; /* global mount flags */
+ uint64_t m_flags; /* global mount flags */
bool m_inotbt_nores; /* no per-AG finobt resv. */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
@@ -148,14 +148,14 @@ typedef struct xfs_mount {
int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
- __uint64_t m_maxicount; /* maximum inode count */
- __uint64_t m_resblks; /* total reserved blocks */
- __uint64_t m_resblks_avail;/* available reserved blocks */
- __uint64_t m_resblks_save; /* reserved blks @ remount,ro */
+ uint64_t m_maxicount; /* maximum inode count */
+ uint64_t m_resblks; /* total reserved blocks */
+ uint64_t m_resblks_avail;/* available reserved blocks */
+ uint64_t m_resblks_save; /* reserved blks @ remount,ro */
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
int m_sinoalign; /* stripe unit inode alignment */
- __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
+ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
@@ -183,6 +183,7 @@ typedef struct xfs_mount {
struct workqueue_struct *m_reclaim_workqueue;
struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
+ struct workqueue_struct *m_sync_workqueue;
/*
* Generation of the filesysyem layout. This is incremented by each
@@ -193,18 +194,17 @@ typedef struct xfs_mount {
* ever support shrinks it would have to be persisted in addition
* to various other kinds of pain inflicted on the pNFS server.
*/
- __uint32_t m_generation;
+ uint32_t m_generation;
bool m_fail_unmount;
#ifdef DEBUG
/*
- * DEBUG mode instrumentation to test and/or trigger delayed allocation
- * block killing in the event of failed writes. When enabled, all
- * buffered writes are forced to fail. All delalloc blocks in the range
- * of the write (including pre-existing delalloc blocks!) are tossed as
- * part of the write failure error handling sequence.
+ * Frequency with which errors are injected. Replaces xfs_etest; the
+ * value stored in here is the inverse of the frequency with which the
+ * error triggers. 1 = always, 2 = half the time, etc.
*/
- bool m_fail_writes;
+ unsigned int *m_errortag;
+ struct xfs_kobj m_errortag_kobj;
#endif
} xfs_mount_t;
@@ -311,7 +311,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
static inline xfs_agnumber_t
xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
{
- xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+ xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
do_div(ld, mp->m_sb.sb_agblocks);
return (xfs_agnumber_t) ld;
}
@@ -319,24 +319,10 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
static inline xfs_agblock_t
xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
{
- xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+ xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
-#ifdef DEBUG
-static inline bool
-xfs_mp_fail_writes(struct xfs_mount *mp)
-{
- return mp->m_fail_writes;
-}
-#else
-static inline bool
-xfs_mp_fail_writes(struct xfs_mount *mp)
-{
- return 0;
-}
-#endif
-
/* per-AG block reservation data structures*/
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
@@ -365,12 +351,12 @@ typedef struct xfs_perag {
char pagi_init; /* this agi's entry is initialized */
char pagf_metadata; /* the agf is preferred to be metadata */
char pagi_inodeok; /* The agi is ok for inodes */
- __uint8_t pagf_levels[XFS_BTNUM_AGF];
+ uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
- __uint32_t pagf_flcount; /* count of blocks in freelist */
+ uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
- __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
+ uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
xfs_agino_t pagi_freecount; /* number of free inodes */
xfs_agino_t pagi_count; /* number of allocated inodes */
@@ -384,6 +370,8 @@ typedef struct xfs_perag {
xfs_agino_t pagl_rightrec;
spinlock_t pagb_lock; /* lock for pagb_tree */
struct rb_root pagb_tree; /* ordered tree of busy extents */
+ unsigned int pagb_gen; /* generation count for pagb_tree */
+ wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
@@ -407,7 +395,7 @@ typedef struct xfs_perag {
struct xfs_ag_resv pag_agfl_resv;
/* reference count */
- __uint8_t pagf_refcount_level;
+ uint8_t pagf_refcount_level;
} xfs_perag_t;
static inline struct xfs_ag_resv *
@@ -430,7 +418,7 @@ void xfs_buf_hash_destroy(xfs_perag_t *pag);
extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *);
-extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
+extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
xfs_agnumber_t *maxagi);
@@ -446,7 +434,7 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
-extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
+extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b669b123287b..6ce948c436d5 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -851,8 +851,8 @@ xfs_qm_reset_dqcounts(
* started afresh by xfs_qm_quotacheck.
*/
#ifdef DEBUG
- j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- do_div(j, sizeof(xfs_dqblk_t));
+ j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
+ sizeof(xfs_dqblk_t);
ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
#endif
dqb = bp->b_addr;
@@ -1247,6 +1247,7 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
+ struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@@ -1257,7 +1258,32 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
- xfs_dqflock(dqp);
+ /*
+ * The only way the dquot is already flush locked by the time quotacheck
+ * gets here is if reclaim flushed it before the dqadjust walk dirtied
+ * it for the final time. Quotacheck collects all dquot bufs in the
+ * local delwri queue before dquots are dirtied, so reclaim can't have
+ * possibly queued it for I/O. The only way out is to push the buffer to
+ * cycle the flush lock.
+ */
+ if (!xfs_dqflock_nowait(dqp)) {
+ /* buf is pinned in-core by delwri list */
+ DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen);
+ bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL);
+ if (!bp) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+ xfs_buf_unlock(bp);
+
+ xfs_buf_delwri_pushbuf(bp, buffer_list);
+ xfs_buf_rele(bp);
+
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
error = xfs_qm_dqflush(dqp, &bp);
if (error)
goto out_unlock;
@@ -1384,12 +1410,7 @@ xfs_qm_quotacheck(
mp->m_qflags |= flags;
error_return:
- while (!list_empty(&buffer_list)) {
- struct xfs_buf *bp =
- list_first_entry(&buffer_list, struct xfs_buf, b_list);
- list_del_init(&bp->b_list);
- xfs_buf_relse(bp);
- }
+ xfs_buf_delwri_cancel(&buffer_list);
if (error) {
xfs_warn(mp,
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3e52d5de7ae1..2be6d2735ca9 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -33,7 +33,7 @@ xfs_fill_statvfs_from_dquot(
struct kstatfs *statp,
struct xfs_dquot *dqp)
{
- __uint64_t limit;
+ uint64_t limit;
limit = dqp->q_core.d_blk_softlimit ?
be64_to_cpu(dqp->q_core.d_blk_softlimit) :
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 475a3882a81f..9cb5c381b01c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
+ xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
+ XFS_AGITER_INEW_WAIT);
}
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index f82d79a8c694..de9493253edf 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -269,7 +269,6 @@ xfs_fs_get_nextdqblk(
/* ID may be different, so convert back what we got */
*qid = make_kqid(current_user_ns(), qid->type, id);
return 0;
-
}
STATIC int
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 6e4c7446c3d4..96fe209b5eb6 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -221,6 +221,7 @@ void
xfs_cui_release(
struct xfs_cui_log_item *cuip)
{
+ ASSERT(atomic_read(&cuip->cui_refcount) > 0);
if (atomic_dec_and_test(&cuip->cui_refcount)) {
xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
xfs_cui_item_free(cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 07593a362cd0..ab2270a87196 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -82,11 +82,22 @@
* mappings are a reservation against the free space in the filesystem;
* adjacent mappings can also be combined into fewer larger mappings.
*
+ * As an optimization, the CoW extent size hint (cowextsz) creates
+ * outsized aligned delalloc reservations in the hope of landing out of
+ * order nearby CoW writes in a single extent on disk, thereby reducing
+ * fragmentation and improving future performance.
+ *
+ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
+ * C: ------DDDDDDD--------- (CoW fork)
+ *
* When dirty pages are being written out (typically in writepage), the
- * delalloc reservations are converted into real mappings by allocating
- * blocks and replacing the delalloc mapping with real ones. A delalloc
- * mapping can be replaced by several real ones if the free space is
- * fragmented.
+ * delalloc reservations are converted into unwritten mappings by
+ * allocating blocks and replacing the delalloc mapping with real ones.
+ * A delalloc mapping can be replaced by several unwritten ones if the
+ * free space is fragmented.
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUUUUUU---------
*
* We want to adapt the delalloc mechanism for copy-on-write, since the
* write paths are similar. The first two steps (creating the reservation
@@ -101,13 +112,29 @@
* Block-aligned directio writes will use the same mechanism as buffered
* writes.
*
+ * Just prior to submitting the actual disk write requests, we convert
+ * the extents representing the range of the file actually being written
+ * (as opposed to extra pieces created for the cowextsize hint) to real
+ * extents. This will become important in the next step:
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUrrUUU---------
+ *
* CoW remapping must be done after the data block write completes,
* because we don't want to destroy the old data fork map until we're sure
* the new block has been written. Since the new mappings are kept in a
* separate fork, we can simply iterate these mappings to find the ones
* that cover the file blocks that we just CoW'd. For each extent, simply
* unmap the corresponding range in the data fork, map the new range into
- * the data fork, and remove the extent from the CoW fork.
+ * the data fork, and remove the extent from the CoW fork. Because of
+ * the presence of the cowextsize hint, however, we must be careful
+ * only to remap the blocks that we've actually written out -- we must
+ * never remap delalloc reservations nor CoW staging blocks that have
+ * yet to be written. This corresponds exactly to the real extents in
+ * the CoW fork:
+ *
+ * D: --RRRRRRrrSRRRRRRRR---
+ * C: ------UU--UUU---------
*
* Since the remapping operation can be applied to an arbitrary file
* range, we record the need for the remap step as a flag in the ioend
@@ -128,6 +155,7 @@
int
xfs_reflink_find_shared(
struct xfs_mount *mp,
+ struct xfs_trans *tp,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_extlen_t aglen,
@@ -139,18 +167,18 @@ xfs_reflink_find_shared(
struct xfs_btree_cur *cur;
int error;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
find_end_of_shared);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_brelse(tp, agbp);
return error;
}
@@ -179,11 +207,7 @@ xfs_reflink_trim_around_shared(
int error = 0;
/* Holes, unwritten, and delalloc extents cannot be shared */
- if (!xfs_is_reflink_inode(ip) ||
- ISUNWRITTEN(irec) ||
- irec->br_startblock == HOLESTARTBLOCK ||
- irec->br_startblock == DELAYSTARTBLOCK ||
- isnullstartblock(irec->br_startblock)) {
+ if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
*shared = false;
return 0;
}
@@ -194,7 +218,7 @@ xfs_reflink_trim_around_shared(
agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
aglen = irec->br_blockcount;
- error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
+ error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
aglen, &fbno, &flen, true);
if (error)
return error;
@@ -296,103 +320,165 @@ xfs_reflink_reserve_cow(
return 0;
}
-/* Allocate all CoW reservations covering a range of blocks in a file. */
-static int
-__xfs_reflink_allocate_cow(
- struct xfs_inode *ip,
- xfs_fileoff_t *offset_fsb,
- xfs_fileoff_t end_fsb)
+/* Convert part of an unwritten CoW extent to a real one. */
+STATIC int
+xfs_reflink_convert_cow_extent(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ struct xfs_defer_ops *dfops)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_bmbt_irec imap;
- struct xfs_defer_ops dfops;
- struct xfs_trans *tp;
- xfs_fsblock_t first_block;
- int nimaps = 1, error;
- bool shared;
+ xfs_fsblock_t first_block;
+ int nimaps = 1;
- xfs_defer_init(&dfops, &first_block);
-
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
- XFS_TRANS_RESERVE, &tp);
- if (error)
- return error;
+ if (imap->br_state == XFS_EXT_NORM)
+ return 0;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trim_extent(imap, offset_fsb, count_fsb);
+ trace_xfs_reflink_convert_cow(ip, imap);
+ if (imap->br_blockcount == 0)
+ return 0;
+ return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
+ 0, imap, &nimaps, dfops);
+}
- /* Read extent from the source file. */
- nimaps = 1;
- error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
- &imap, &nimaps, 0);
- if (error)
- goto out_unlock;
- ASSERT(nimaps == 1);
+/* Convert all of the unwritten CoW extents in a file's range to real ones. */
+int
+xfs_reflink_convert_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_defer_ops dfops;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
+ xfs_extnum_t idx;
+ bool found;
+ int error = 0;
- error = xfs_reflink_reserve_cow(ip, &imap, &shared);
- if (error)
- goto out_trans_cancel;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (!shared) {
- *offset_fsb = imap.br_startoff + imap.br_blockcount;
- goto out_trans_cancel;
+ /* Convert all the extents to real from unwritten. */
+ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+ found && got.br_startoff < end_fsb;
+ found = xfs_iext_get_extent(ifp, ++idx, &got)) {
+ error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
+ end_fsb - offset_fsb, &dfops);
+ if (error)
+ break;
}
- xfs_trans_ijoin(tp, ip, 0);
- error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
- XFS_BMAPI_COWFORK, &first_block,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
- &imap, &nimaps, &dfops);
- if (error)
- goto out_trans_cancel;
-
- error = xfs_defer_finish(&tp, &dfops, NULL);
- if (error)
- goto out_trans_cancel;
-
- error = xfs_trans_commit(tp);
-
- *offset_fsb = imap.br_startoff + imap.br_blockcount;
-out_unlock:
+ /* Finish up. */
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
-out_trans_cancel:
- xfs_defer_cancel(&dfops);
- xfs_trans_cancel(tp);
- goto out_unlock;
}
-/* Allocate all CoW reservations covering a part of a file. */
+/* Allocate all CoW reservations covering a range of blocks in a file. */
int
-xfs_reflink_allocate_cow_range(
+xfs_reflink_allocate_cow(
struct xfs_inode *ip,
- xfs_off_t offset,
- xfs_off_t count)
+ struct xfs_bmbt_irec *imap,
+ bool *shared,
+ uint *lockmode)
{
struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
- xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
- int error;
+ xfs_fileoff_t offset_fsb = imap->br_startoff;
+ xfs_filblks_t count_fsb = imap->br_blockcount;
+ struct xfs_bmbt_irec got;
+ struct xfs_defer_ops dfops;
+ struct xfs_trans *tp = NULL;
+ xfs_fsblock_t first_block;
+ int nimaps, error = 0;
+ bool trimmed;
+ xfs_filblks_t resaligned;
+ xfs_extlen_t resblks = 0;
+ xfs_extnum_t idx;
+retry:
ASSERT(xfs_is_reflink_inode(ip));
-
- trace_xfs_reflink_allocate_cow_range(ip, offset, count);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
/*
- * Make sure that the dquots are there.
+ * Even if the extent is not shared we might have a preallocation for
+ * it in the COW fork. If so use it.
*/
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
+ if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
+ got.br_startoff <= offset_fsb) {
+ *shared = true;
- while (offset_fsb < end_fsb) {
- error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
- if (error) {
- trace_xfs_reflink_allocate_cow_range_error(ip, error,
- _RET_IP_);
- break;
+ /* If we have a real allocation in the COW fork we're done. */
+ if (!isnullstartblock(got.br_startblock)) {
+ xfs_trim_extent(&got, offset_fsb, count_fsb);
+ *imap = got;
+ goto convert;
}
+
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+ } else {
+ error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+ if (error || !*shared)
+ goto out;
+ }
+
+ if (!tp) {
+ resaligned = xfs_aligned_fsb_count(imap->br_startoff,
+ imap->br_blockcount, xfs_get_cowextsz_hint(ip));
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+
+ xfs_iunlock(ip, *lockmode);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+ *lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, *lockmode);
+
+ if (error)
+ return error;
+
+ error = xfs_qm_dqattach_locked(ip, 0);
+ if (error)
+ goto out;
+ goto retry;
}
+ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_defer_init(&dfops, &first_block);
+ nimaps = 1;
+
+ /* Allocate the entire reservation as unwritten blocks. */
+ error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
+ resblks, imap, &nimaps, &dfops);
+ if (error)
+ goto out_bmap_cancel;
+
+ /* Finish up. */
+ error = xfs_defer_finish(&tp, &dfops, NULL);
+ if (error)
+ goto out_bmap_cancel;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+ return error;
+convert:
+ return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
+ &dfops);
+out_bmap_cancel:
+ xfs_defer_cancel(&dfops);
+ xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+out:
+ if (tp)
+ xfs_trans_cancel(tp);
return error;
}
@@ -459,14 +545,18 @@ xfs_reflink_trim_irec_to_next_cow(
}
/*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_blocks(
struct xfs_inode *ip,
struct xfs_trans **tpp,
xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb)
+ xfs_fileoff_t end_fsb,
+ bool cancel_real)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, del;
@@ -490,7 +580,7 @@ xfs_reflink_cancel_cow_blocks(
&idx, &got, &del);
if (error)
break;
- } else {
+ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);
@@ -532,13 +622,17 @@ xfs_reflink_cancel_cow_blocks(
}
/*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t count)
+ xfs_off_t count,
+ bool cancel_real)
{
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
@@ -564,7 +658,8 @@ xfs_reflink_cancel_cow_range(
xfs_trans_ijoin(tp, ip, 0);
/* Scrape out the old CoW reservations */
- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+ cancel_real);
if (error)
goto out_cancel;
@@ -611,8 +706,22 @@ xfs_reflink_end_cow(
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
- /* Start a rolling transaction to switch the mappings */
- resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
+ /*
+ * Start a rolling transaction to switch the mappings. We're
+ * unlikely ever to have to remap 16T worth of single-block
+ * extents, so just cap the worst case extent count to 2^32-1.
+ * Stick a warning in just in case, and avoid 64-bit division.
+ */
+ BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX);
+ if (end_fsb - offset_fsb > UINT_MAX) {
+ error = -EFSCORRUPTED;
+ xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(0);
+ goto out;
+ }
+ resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount,
+ (unsigned int)(end_fsb - offset_fsb),
+ XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
resblks, 0, 0, &tp);
if (error)
@@ -641,6 +750,16 @@ xfs_reflink_end_cow(
ASSERT(!isnullstartblock(got.br_startblock));
+ /*
+ * Don't remap unwritten extents; these are
+ * speculatively preallocated CoW extents that have been
+ * allocated but have not yet been involved in a write.
+ */
+ if (got.br_state == XFS_EXT_UNWRITTEN) {
+ idx--;
+ goto next_extent;
+ }
+
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
rlen = del.br_blockcount;
@@ -855,13 +974,14 @@ STATIC int
xfs_reflink_update_dest(
struct xfs_inode *dest,
xfs_off_t newlen,
- xfs_extlen_t cowextsize)
+ xfs_extlen_t cowextsize,
+ bool is_dedupe)
{
struct xfs_mount *mp = dest->i_mount;
struct xfs_trans *tp;
int error;
- if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+ if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
return 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -882,6 +1002,10 @@ xfs_reflink_update_dest(
dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
}
+ if (!is_dedupe) {
+ xfs_trans_ichgtime(tp, dest,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ }
xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
error = xfs_trans_commit(tp);
@@ -932,12 +1056,12 @@ xfs_reflink_remap_extent(
xfs_off_t new_isize)
{
struct xfs_mount *mp = ip->i_mount;
+ bool real_extent = xfs_bmap_is_real_extent(irec);
struct xfs_trans *tp;
xfs_fsblock_t firstfsb;
unsigned int resblks;
struct xfs_defer_ops dfops;
struct xfs_bmbt_irec uirec;
- bool real_extent;
xfs_filblks_t rlen;
xfs_filblks_t unmap_len;
xfs_off_t newlen;
@@ -946,11 +1070,6 @@ xfs_reflink_remap_extent(
unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
- /* Only remap normal extents. */
- real_extent = (irec->br_startblock != HOLESTARTBLOCK &&
- irec->br_startblock != DELAYSTARTBLOCK &&
- !ISUNWRITTEN(irec));
-
/* No reflinking if we're low on space */
if (real_extent) {
error = xfs_reflink_ag_has_free_space(mp,
@@ -1195,7 +1314,8 @@ xfs_reflink_remap_range(
!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
cowextsize = src->i_d.di_cowextsize;
- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ is_dedupe);
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
@@ -1245,9 +1365,7 @@ xfs_reflink_dirty_extents(
goto out;
if (nmaps == 0)
break;
- if (map[0].br_startblock == HOLESTARTBLOCK ||
- map[0].br_startblock == DELAYSTARTBLOCK ||
- ISUNWRITTEN(&map[0]))
+ if (!xfs_bmap_is_real_extent(&map[0]))
goto next;
map[1] = map[0];
@@ -1256,8 +1374,8 @@ xfs_reflink_dirty_extents(
agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
aglen = map[1].br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
- &rbno, &rlen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ aglen, &rbno, &rlen, true);
if (error)
goto out;
if (rbno == NULLAGBLOCK)
@@ -1288,64 +1406,78 @@ out:
return error;
}
-/* Clear the inode reflink flag if there are no shared extents. */
+/* Does this inode need the reflink flag? */
int
-xfs_reflink_clear_inode_flag(
- struct xfs_inode *ip,
- struct xfs_trans **tpp)
+xfs_reflink_inode_has_shared_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ bool *has_shared)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t fbno;
- xfs_filblks_t end;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- xfs_extlen_t aglen;
- xfs_agblock_t rbno;
- xfs_extlen_t rlen;
- struct xfs_bmbt_irec map;
- int nmaps;
- int error = 0;
-
- ASSERT(xfs_is_reflink_inode(ip));
+ struct xfs_bmbt_irec got;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_extlen_t aglen;
+ xfs_agblock_t rbno;
+ xfs_extlen_t rlen;
+ xfs_extnum_t idx;
+ bool found;
+ int error;
- fbno = 0;
- end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
- while (end - fbno > 0) {
- nmaps = 1;
- /*
- * Look for extents in the file. Skip holes, delalloc, or
- * unwritten extents; they can't be reflinked.
- */
- error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0);
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
return error;
- if (nmaps == 0)
- break;
- if (map.br_startblock == HOLESTARTBLOCK ||
- map.br_startblock == DELAYSTARTBLOCK ||
- ISUNWRITTEN(&map))
- goto next;
+ }
- agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
- aglen = map.br_blockcount;
+ *has_shared = false;
+ found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
+ while (found) {
+ if (isnullstartblock(got.br_startblock) ||
+ got.br_state != XFS_EXT_NORM)
+ goto next;
+ agno = XFS_FSB_TO_AGNO(mp, got.br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock);
+ aglen = got.br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
+ error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen,
&rbno, &rlen, false);
if (error)
return error;
/* Is there still a shared block here? */
- if (rbno != NULLAGBLOCK)
+ if (rbno != NULLAGBLOCK) {
+ *has_shared = true;
return 0;
+ }
next:
- fbno = map.br_startoff + map.br_blockcount;
+ found = xfs_iext_get_extent(ifp, ++idx, &got);
}
+ return 0;
+}
+
+/* Clear the inode reflink flag if there are no shared extents. */
+int
+xfs_reflink_clear_inode_flag(
+ struct xfs_inode *ip,
+ struct xfs_trans **tpp)
+{
+ bool needs_flag;
+ int error = 0;
+
+ ASSERT(xfs_is_reflink_inode(ip));
+
+ error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+ if (error || needs_flag)
+ return error;
+
/*
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
if (error)
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index aa6a4d64bd35..701487bab468 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -20,16 +20,18 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
-extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
- xfs_extlen_t *flen, bool find_maximal);
+extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
+ xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared);
-extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
- xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
+extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
struct xfs_bmbt_irec *imap);
extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
@@ -37,14 +39,16 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb);
+ xfs_fileoff_t end_fsb, bool cancel_real);
extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t count);
+ xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
+ struct xfs_inode *ip, bool *has_shared);
extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
struct xfs_trans **tpp);
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 73c827831551..f3b139c9aa16 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -243,6 +243,7 @@ void
xfs_rui_release(
struct xfs_rui_log_item *ruip)
{
+ ASSERT(atomic_read(&ruip->rui_refcount) > 0);
if (atomic_dec_and_test(&ruip->rui_refcount)) {
xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
xfs_rui_item_free(ruip);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 802bcc326d9f..91472193643b 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1093,7 +1093,6 @@ xfs_rtallocate_extent(
xfs_extlen_t minlen, /* minimum length to allocate */
xfs_extlen_t maxlen, /* maximum length to allocate */
xfs_extlen_t *len, /* out: actual length allocated */
- xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */
int wasdel, /* was a delayed allocation extent */
xfs_extlen_t prod, /* extent product factor */
xfs_rtblock_t *rtblock) /* out: start block allocated */
@@ -1123,27 +1122,16 @@ xfs_rtallocate_extent(
}
}
+retry:
sumbp = NULL;
- /*
- * Allocate by size, or near another block, or exactly at some block.
- */
- switch (type) {
- case XFS_ALLOCTYPE_ANY_AG:
+ if (bno == 0) {
error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len,
&sumbp, &sb, prod, &r);
- break;
- case XFS_ALLOCTYPE_NEAR_BNO:
+ } else {
error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen,
len, &sumbp, &sb, prod, &r);
- break;
- case XFS_ALLOCTYPE_THIS_BNO:
- error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen,
- len, &sumbp, &sb, prod, &r);
- break;
- default:
- error = -EIO;
- ASSERT(0);
}
+
if (error)
return error;
@@ -1158,7 +1146,11 @@ xfs_rtallocate_extent(
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen);
else
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen);
+ } else if (prod > 1) {
+ prod = 1;
+ goto retry;
}
+
*rtblock = r;
return 0;
}
@@ -1264,13 +1256,13 @@ xfs_rtpick_extent(
{
xfs_rtblock_t b; /* result block */
int log2; /* log of sequence number */
- __uint64_t resid; /* residual after log removed */
- __uint64_t seq; /* sequence number of file creation */
- __uint64_t *seqp; /* pointer to seqno in inode */
+ uint64_t resid; /* residual after log removed */
+ uint64_t seq; /* sequence number of file creation */
+ uint64_t *seqp; /* pointer to seqno in inode */
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
- seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
+ seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
*seqp = 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 355dd9e1cb64..79defa722bf1 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -23,6 +23,16 @@
struct xfs_mount;
struct xfs_trans;
+struct xfs_rtalloc_rec {
+ xfs_rtblock_t ar_startblock;
+ xfs_rtblock_t ar_blockcount;
+};
+
+typedef int (*xfs_rtalloc_query_range_fn)(
+ struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *rec,
+ void *priv);
+
#ifdef CONFIG_XFS_RT
/*
* Function prototypes for exported functions.
@@ -40,7 +50,6 @@ xfs_rtallocate_extent(
xfs_extlen_t minlen, /* minimum length to allocate */
xfs_extlen_t maxlen, /* maximum length to allocate */
xfs_extlen_t *len, /* out: actual length allocated */
- xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */
int wasdel, /* was a delayed allocation extent */
xfs_extlen_t prod, /* extent product factor */
xfs_rtblock_t *rtblock); /* out: start block allocated */
@@ -98,6 +107,8 @@ xfs_growfs_rt(
/*
* From xfs_rtbitmap.c
*/
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len, int val,
xfs_rtblock_t *new, int *stat);
@@ -119,13 +130,22 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len,
struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
-
-
+int xfs_rtalloc_query_range(struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *low_rec,
+ struct xfs_rtalloc_rec *high_rec,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv);
+int xfs_rtalloc_query_all(struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv);
#else
-# define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS)
+# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
+# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
+# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f11282c96887..056e12b421eb 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -33,9 +33,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{
int i, j;
int len = 0;
- __uint64_t xs_xstrat_bytes = 0;
- __uint64_t xs_write_bytes = 0;
- __uint64_t xs_read_bytes = 0;
+ uint64_t xs_xstrat_bytes = 0;
+ uint64_t xs_write_bytes = 0;
+ uint64_t xs_read_bytes = 0;
static const struct xstats_entry {
char *desc;
@@ -100,7 +100,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
void xfs_stats_clearall(struct xfsstats __percpu *stats)
{
int c;
- __uint32_t vn_active;
+ uint32_t vn_active;
xfs_notice(NULL, "Clearing xfsstats");
for_each_possible_cpu(c) {
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 375840f5a99a..f64d0ae345c4 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -54,125 +54,125 @@ enum {
*/
struct __xfsstats {
# define XFSSTAT_END_EXTENT_ALLOC 4
- __uint32_t xs_allocx;
- __uint32_t xs_allocb;
- __uint32_t xs_freex;
- __uint32_t xs_freeb;
+ uint32_t xs_allocx;
+ uint32_t xs_allocb;
+ uint32_t xs_freex;
+ uint32_t xs_freeb;
# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
- __uint32_t xs_abt_lookup;
- __uint32_t xs_abt_compare;
- __uint32_t xs_abt_insrec;
- __uint32_t xs_abt_delrec;
+ uint32_t xs_abt_lookup;
+ uint32_t xs_abt_compare;
+ uint32_t xs_abt_insrec;
+ uint32_t xs_abt_delrec;
# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
- __uint32_t xs_blk_mapr;
- __uint32_t xs_blk_mapw;
- __uint32_t xs_blk_unmap;
- __uint32_t xs_add_exlist;
- __uint32_t xs_del_exlist;
- __uint32_t xs_look_exlist;
- __uint32_t xs_cmp_exlist;
+ uint32_t xs_blk_mapr;
+ uint32_t xs_blk_mapw;
+ uint32_t xs_blk_unmap;
+ uint32_t xs_add_exlist;
+ uint32_t xs_del_exlist;
+ uint32_t xs_look_exlist;
+ uint32_t xs_cmp_exlist;
# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
- __uint32_t xs_bmbt_lookup;
- __uint32_t xs_bmbt_compare;
- __uint32_t xs_bmbt_insrec;
- __uint32_t xs_bmbt_delrec;
+ uint32_t xs_bmbt_lookup;
+ uint32_t xs_bmbt_compare;
+ uint32_t xs_bmbt_insrec;
+ uint32_t xs_bmbt_delrec;
# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
- __uint32_t xs_dir_lookup;
- __uint32_t xs_dir_create;
- __uint32_t xs_dir_remove;
- __uint32_t xs_dir_getdents;
+ uint32_t xs_dir_lookup;
+ uint32_t xs_dir_create;
+ uint32_t xs_dir_remove;
+ uint32_t xs_dir_getdents;
# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
- __uint32_t xs_trans_sync;
- __uint32_t xs_trans_async;
- __uint32_t xs_trans_empty;
+ uint32_t xs_trans_sync;
+ uint32_t xs_trans_async;
+ uint32_t xs_trans_empty;
# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
- __uint32_t xs_ig_attempts;
- __uint32_t xs_ig_found;
- __uint32_t xs_ig_frecycle;
- __uint32_t xs_ig_missed;
- __uint32_t xs_ig_dup;
- __uint32_t xs_ig_reclaims;
- __uint32_t xs_ig_attrchg;
+ uint32_t xs_ig_attempts;
+ uint32_t xs_ig_found;
+ uint32_t xs_ig_frecycle;
+ uint32_t xs_ig_missed;
+ uint32_t xs_ig_dup;
+ uint32_t xs_ig_reclaims;
+ uint32_t xs_ig_attrchg;
# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
- __uint32_t xs_log_writes;
- __uint32_t xs_log_blocks;
- __uint32_t xs_log_noiclogs;
- __uint32_t xs_log_force;
- __uint32_t xs_log_force_sleep;
+ uint32_t xs_log_writes;
+ uint32_t xs_log_blocks;
+ uint32_t xs_log_noiclogs;
+ uint32_t xs_log_force;
+ uint32_t xs_log_force_sleep;
# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
- __uint32_t xs_try_logspace;
- __uint32_t xs_sleep_logspace;
- __uint32_t xs_push_ail;
- __uint32_t xs_push_ail_success;
- __uint32_t xs_push_ail_pushbuf;
- __uint32_t xs_push_ail_pinned;
- __uint32_t xs_push_ail_locked;
- __uint32_t xs_push_ail_flushing;
- __uint32_t xs_push_ail_restarts;
- __uint32_t xs_push_ail_flush;
+ uint32_t xs_try_logspace;
+ uint32_t xs_sleep_logspace;
+ uint32_t xs_push_ail;
+ uint32_t xs_push_ail_success;
+ uint32_t xs_push_ail_pushbuf;
+ uint32_t xs_push_ail_pinned;
+ uint32_t xs_push_ail_locked;
+ uint32_t xs_push_ail_flushing;
+ uint32_t xs_push_ail_restarts;
+ uint32_t xs_push_ail_flush;
# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
- __uint32_t xs_xstrat_quick;
- __uint32_t xs_xstrat_split;
+ uint32_t xs_xstrat_quick;
+ uint32_t xs_xstrat_split;
# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
- __uint32_t xs_write_calls;
- __uint32_t xs_read_calls;
+ uint32_t xs_write_calls;
+ uint32_t xs_read_calls;
# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
- __uint32_t xs_attr_get;
- __uint32_t xs_attr_set;
- __uint32_t xs_attr_remove;
- __uint32_t xs_attr_list;
+ uint32_t xs_attr_get;
+ uint32_t xs_attr_set;
+ uint32_t xs_attr_remove;
+ uint32_t xs_attr_list;
# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
- __uint32_t xs_iflush_count;
- __uint32_t xs_icluster_flushcnt;
- __uint32_t xs_icluster_flushinode;
+ uint32_t xs_iflush_count;
+ uint32_t xs_icluster_flushcnt;
+ uint32_t xs_icluster_flushinode;
# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
- __uint32_t vn_active; /* # vnodes not on free lists */
- __uint32_t vn_alloc; /* # times vn_alloc called */
- __uint32_t vn_get; /* # times vn_get called */
- __uint32_t vn_hold; /* # times vn_hold called */
- __uint32_t vn_rele; /* # times vn_rele called */
- __uint32_t vn_reclaim; /* # times vn_reclaim called */
- __uint32_t vn_remove; /* # times vn_remove called */
- __uint32_t vn_free; /* # times vn_free called */
+ uint32_t vn_active; /* # vnodes not on free lists */
+ uint32_t vn_alloc; /* # times vn_alloc called */
+ uint32_t vn_get; /* # times vn_get called */
+ uint32_t vn_hold; /* # times vn_hold called */
+ uint32_t vn_rele; /* # times vn_rele called */
+ uint32_t vn_reclaim; /* # times vn_reclaim called */
+ uint32_t vn_remove; /* # times vn_remove called */
+ uint32_t vn_free; /* # times vn_free called */
#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
- __uint32_t xb_get;
- __uint32_t xb_create;
- __uint32_t xb_get_locked;
- __uint32_t xb_get_locked_waited;
- __uint32_t xb_busy_locked;
- __uint32_t xb_miss_locked;
- __uint32_t xb_page_retries;
- __uint32_t xb_page_found;
- __uint32_t xb_get_read;
+ uint32_t xb_get;
+ uint32_t xb_create;
+ uint32_t xb_get_locked;
+ uint32_t xb_get_locked_waited;
+ uint32_t xb_busy_locked;
+ uint32_t xb_miss_locked;
+ uint32_t xb_page_retries;
+ uint32_t xb_page_found;
+ uint32_t xb_get_read;
/* Version 2 btree counters */
#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX)
- __uint32_t xs_abtb_2[__XBTS_MAX];
+ uint32_t xs_abtb_2[__XBTS_MAX];
#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
- __uint32_t xs_abtc_2[__XBTS_MAX];
+ uint32_t xs_abtc_2[__XBTS_MAX];
#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
- __uint32_t xs_bmbt_2[__XBTS_MAX];
+ uint32_t xs_bmbt_2[__XBTS_MAX];
#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
- __uint32_t xs_ibt_2[__XBTS_MAX];
+ uint32_t xs_ibt_2[__XBTS_MAX];
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
- __uint32_t xs_fibt_2[__XBTS_MAX];
+ uint32_t xs_fibt_2[__XBTS_MAX];
#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
- __uint32_t xs_rmap_2[__XBTS_MAX];
+ uint32_t xs_rmap_2[__XBTS_MAX];
#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
- __uint32_t xs_refcbt_2[__XBTS_MAX];
+ uint32_t xs_refcbt_2[__XBTS_MAX];
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6)
- __uint32_t xs_qm_dqreclaims;
- __uint32_t xs_qm_dqreclaim_misses;
- __uint32_t xs_qm_dquot_dups;
- __uint32_t xs_qm_dqcachemisses;
- __uint32_t xs_qm_dqcachehits;
- __uint32_t xs_qm_dqwants;
+ uint32_t xs_qm_dqreclaims;
+ uint32_t xs_qm_dqreclaim_misses;
+ uint32_t xs_qm_dquot_dups;
+ uint32_t xs_qm_dqcachemisses;
+ uint32_t xs_qm_dqcachehits;
+ uint32_t xs_qm_dqwants;
#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
- __uint32_t xs_qm_dquot;
- __uint32_t xs_qm_dquot_unused;
+ uint32_t xs_qm_dquot;
+ uint32_t xs_qm_dquot_unused;
/* Extra precision counters */
- __uint64_t xs_xstrat_bytes;
- __uint64_t xs_write_bytes;
- __uint64_t xs_read_bytes;
+ uint64_t xs_xstrat_bytes;
+ uint64_t xs_write_bytes;
+ uint64_t xs_read_bytes;
};
struct xfsstats {
@@ -186,7 +186,7 @@ struct xfsstats {
* simple wrapper for getting the array index of s struct member offset
*/
#define XFS_STATS_CALC_INDEX(member) \
- (offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
+ (offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t))
int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index eecbaac08eba..38aaacdbb8b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -52,6 +52,7 @@
#include "xfs_reflink.h"
#include <linux/namei.h>
+#include <linux/dax.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/mount.h>
@@ -195,7 +196,7 @@ xfs_parseargs(
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
- __uint8_t iosizelog = 0;
+ uint8_t iosizelog = 0;
/*
* set up the mount name first so all the errors will refer to the
@@ -555,7 +556,7 @@ xfs_showargs(
return 0;
}
-static __uint64_t
+static uint64_t
xfs_max_file_offset(
unsigned int blockshift)
{
@@ -586,7 +587,7 @@ xfs_max_file_offset(
# endif
#endif
- return (((__uint64_t)pagefactor) << bitshift) - 1;
+ return (((uint64_t)pagefactor) << bitshift) - 1;
}
/*
@@ -621,7 +622,7 @@ xfs_set_inode_alloc(
* the max inode percentage. Used only for inode32.
*/
if (mp->m_maxicount) {
- __uint64_t icount;
+ uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
do_div(icount, 100);
@@ -877,8 +878,15 @@ xfs_init_mount_workqueues(
if (!mp->m_eofblocks_workqueue)
goto out_destroy_log;
+ mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
+ mp->m_fsname);
+ if (!mp->m_sync_workqueue)
+ goto out_destroy_eofb;
+
return 0;
+out_destroy_eofb:
+ destroy_workqueue(mp->m_eofblocks_workqueue);
out_destroy_log:
destroy_workqueue(mp->m_log_workqueue);
out_destroy_reclaim:
@@ -899,6 +907,7 @@ STATIC void
xfs_destroy_mount_workqueues(
struct xfs_mount *mp)
{
+ destroy_workqueue(mp->m_sync_workqueue);
destroy_workqueue(mp->m_eofblocks_workqueue);
destroy_workqueue(mp->m_log_workqueue);
destroy_workqueue(mp->m_reclaim_workqueue);
@@ -953,7 +962,7 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_remove);
if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
xfs_warn(ip->i_mount,
"Error %d while evicting CoW blocks for inode %llu.",
@@ -1079,12 +1088,12 @@ xfs_fs_statfs(
struct xfs_mount *mp = XFS_M(dentry->d_sb);
xfs_sb_t *sbp = &mp->m_sb;
struct xfs_inode *ip = XFS_I(d_inode(dentry));
- __uint64_t fakeinos, id;
- __uint64_t icount;
- __uint64_t ifree;
- __uint64_t fdblocks;
+ uint64_t fakeinos, id;
+ uint64_t icount;
+ uint64_t ifree;
+ uint64_t fdblocks;
xfs_extlen_t lsize;
- __int64_t ffree;
+ int64_t ffree;
statp->f_type = XFS_SB_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1107,7 +1116,7 @@ xfs_fs_statfs(
statp->f_bavail = statp->f_bfree;
fakeinos = statp->f_bfree << sbp->sb_inopblog;
- statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+ statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
if (mp->m_maxicount)
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
@@ -1120,7 +1129,7 @@ xfs_fs_statfs(
/* make sure statp->f_ffree does not underflow */
ffree = statp->f_files - (icount - ifree);
- statp->f_ffree = max_t(__int64_t, ffree, 0);
+ statp->f_ffree = max_t(int64_t, ffree, 0);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
@@ -1133,7 +1142,7 @@ xfs_fs_statfs(
STATIC void
xfs_save_resvblks(struct xfs_mount *mp)
{
- __uint64_t resblks = 0;
+ uint64_t resblks = 0;
mp->m_resblks_save = mp->m_resblks;
xfs_reserve_blocks(mp, &resblks, NULL);
@@ -1142,7 +1151,7 @@ xfs_save_resvblks(struct xfs_mount *mp)
STATIC void
xfs_restore_resvblks(struct xfs_mount *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
if (mp->m_resblks_save) {
resblks = mp->m_resblks_save;
@@ -1757,7 +1766,8 @@ STATIC int __init
xfs_init_zones(void)
{
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio));
+ offsetof(struct xfs_ioend, io_inline_bio),
+ BIOSET_NEED_BVECS);
if (!xfs_ioend_bioset)
goto out;
@@ -1956,12 +1966,20 @@ xfs_init_workqueues(void)
if (!xfs_alloc_wq)
return -ENOMEM;
+ xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
+ if (!xfs_discard_wq)
+ goto out_free_alloc_wq;
+
return 0;
+out_free_alloc_wq:
+ destroy_workqueue(xfs_alloc_wq);
+ return -ENOMEM;
}
STATIC void
xfs_destroy_workqueues(void)
{
+ destroy_workqueue(xfs_discard_wq);
destroy_workqueue(xfs_alloc_wq);
}
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index b6418abd85ad..5f2f32408011 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -73,6 +73,8 @@ extern const struct quotactl_ops xfs_quotactl_operations;
extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+extern struct workqueue_struct *xfs_discard_wq;
+
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f2cb45ed1d54..23a50d7aa46a 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -43,8 +43,8 @@
#include "xfs_log.h"
/* ----- Kernel only functions below ----- */
-STATIC int
-xfs_readlink_bmap(
+int
+xfs_readlink_bmap_ilocked(
struct xfs_inode *ip,
char *link)
{
@@ -61,6 +61,8 @@ xfs_readlink_bmap(
int fsblocks = 0;
int offset;
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
fsblocks = xfs_symlink_blocks(mp, pathlen);
error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
if (error)
@@ -143,7 +145,7 @@ xfs_readlink(
if (!pathlen)
goto out;
- if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
@@ -153,7 +155,7 @@ xfs_readlink(
}
- error = xfs_readlink_bmap(ip, link);
+ error = xfs_readlink_bmap_ilocked(ip, link);
out:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -202,7 +204,7 @@ xfs_symlink(
* Check component lengths of the target path name.
*/
pathlen = strlen(target_path);
- if (pathlen >= MAXPATHLEN) /* total string too long */
+ if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */
return -ENAMETOOLONG;
udqp = gdqp = NULL;
@@ -559,7 +561,7 @@ xfs_inactive_symlink(
return 0;
}
- if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
__func__, (unsigned long long)ip->i_ino, pathlen);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index e75245d09116..aeaee8923617 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -21,6 +21,7 @@
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_inactive_symlink(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 984a3499cfe3..82afee005140 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -95,6 +95,7 @@ extern xfs_param_t xfs_params;
struct xfs_globals {
int log_recovery_delay; /* log recovery delay (secs) */
+ bool bug_on_assert; /* BUG() the kernel on assert failure */
};
extern struct xfs_globals xfs_globals;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index de6195e38910..8b2ccc234f36 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -90,15 +90,25 @@ to_mp(struct kobject *kobject)
return container_of(kobj, struct xfs_mount, m_kobj);
}
+static struct attribute *xfs_mp_attrs[] = {
+ NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_mp_attrs,
+};
+
#ifdef DEBUG
+/* debug */
STATIC ssize_t
-fail_writes_store(
+bug_on_assert_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
- struct xfs_mount *mp = to_mp(kobject);
int ret;
int val;
@@ -107,9 +117,9 @@ fail_writes_store(
return ret;
if (val == 1)
- mp->m_fail_writes = true;
+ xfs_globals.bug_on_assert = true;
else if (val == 0)
- mp->m_fail_writes = false;
+ xfs_globals.bug_on_assert = false;
else
return -EINVAL;
@@ -117,33 +127,13 @@ fail_writes_store(
}
STATIC ssize_t
-fail_writes_show(
+bug_on_assert_show(
struct kobject *kobject,
char *buf)
{
- struct xfs_mount *mp = to_mp(kobject);
-
- return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
}
-XFS_SYSFS_ATTR_RW(fail_writes);
-
-#endif /* DEBUG */
-
-static struct attribute *xfs_mp_attrs[] = {
-#ifdef DEBUG
- ATTR_LIST(fail_writes),
-#endif
- NULL,
-};
-
-struct kobj_type xfs_mp_ktype = {
- .release = xfs_sysfs_release,
- .sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_mp_attrs,
-};
-
-#ifdef DEBUG
-/* debug */
+XFS_SYSFS_ATTR_RW(bug_on_assert);
STATIC ssize_t
log_recovery_delay_store(
@@ -176,6 +166,7 @@ log_recovery_delay_show(
XFS_SYSFS_ATTR_RW(log_recovery_delay);
static struct attribute *xfs_dbg_attrs[] = {
+ ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
NULL,
};
@@ -314,47 +305,11 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
-#ifdef DEBUG
-STATIC ssize_t
-log_badcrc_factor_store(
- struct kobject *kobject,
- const char *buf,
- size_t count)
-{
- struct xlog *log = to_xlog(kobject);
- int ret;
- uint32_t val;
-
- ret = kstrtouint(buf, 0, &val);
- if (ret)
- return ret;
-
- log->l_badcrc_factor = val;
-
- return count;
-}
-
-STATIC ssize_t
-log_badcrc_factor_show(
- struct kobject *kobject,
- char *buf)
-{
- struct xlog *log = to_xlog(kobject);
-
- return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
-}
-
-XFS_SYSFS_ATTR_RW(log_badcrc_factor);
-#endif /* DEBUG */
-
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
-#ifdef DEBUG
- ATTR_LIST(log_badcrc_factor),
-#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 7f17ae6d709a..5d95fe348294 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -47,6 +47,7 @@
#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_filestream.h"
+#include "xfs_fsmap.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 69c5bcd9a51b..bcc3cdf8e1c5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -40,6 +40,8 @@ struct xfs_inode_log_format;
struct xfs_bmbt_irec;
struct xfs_btree_cur;
struct xfs_refcount_irec;
+struct xfs_fsmap;
+struct xfs_rmap_irec;
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -249,7 +251,7 @@ TRACE_EVENT(xfs_iext_insert,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -293,7 +295,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -365,6 +367,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
@@ -687,7 +690,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
DEFINE_INODE_EVENT(xfs_filemap_fault);
-DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
+DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
@@ -1278,7 +1281,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->count,
__print_symbolic(__entry->type, XFS_IO_TYPES),
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount)
)
@@ -1488,25 +1491,6 @@ TRACE_EVENT(xfs_extent_busy_trim,
__entry->tlen)
);
-TRACE_EVENT(xfs_trans_commit_lsn,
- TP_PROTO(struct xfs_trans *trans),
- TP_ARGS(trans),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(struct xfs_trans *, tp)
- __field(xfs_lsn_t, lsn)
- ),
- TP_fast_assign(
- __entry->dev = trans->t_mountp->m_super->s_dev;
- __entry->tp = trans;
- __entry->lsn = trans->t_commit_lsn;
- ),
- TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->tp,
- __entry->lsn)
-);
-
TRACE_EVENT(xfs_agf,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
@@ -2055,7 +2039,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
TP_ARGS(log, buf_f),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(__int64_t, blkno)
+ __field(int64_t, blkno)
__field(unsigned short, len)
__field(unsigned short, flags)
__field(unsigned short, size)
@@ -2104,7 +2088,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
__field(int, fields)
__field(unsigned short, asize)
__field(unsigned short, dsize)
- __field(__int64_t, blkno)
+ __field(int64_t, blkno)
__field(int, len)
__field(int, boffset)
),
@@ -2190,7 +2174,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
__entry->agbno = agbno;
__entry->len = len;
),
- TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+ TP_printk("dev %d:%d agno %u agbno %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
@@ -2245,7 +2229,6 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
/* deferred ops */
struct xfs_defer_pending;
-struct xfs_defer_intake;
struct xfs_defer_ops;
DECLARE_EVENT_CLASS(xfs_defer_class,
@@ -2254,8 +2237,8 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(void *, dop)
- __field(bool, committed)
- __field(bool, low)
+ __field(char, committed)
+ __field(char, low)
),
TP_fast_assign(
__entry->dev = mp ? mp->m_super->s_dev : 0;
@@ -2263,7 +2246,7 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
__entry->committed = dop->dop_committed;
__entry->low = dop->dop_low;
),
- TP_printk("dev %d:%d ops %p committed %d low %d\n",
+ TP_printk("dev %d:%d ops %p committed %d low %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dop,
__entry->committed,
@@ -2280,8 +2263,8 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(void *, dop)
- __field(bool, committed)
- __field(bool, low)
+ __field(char, committed)
+ __field(char, low)
__field(int, error)
),
TP_fast_assign(
@@ -2291,7 +2274,7 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
__entry->low = dop->dop_low;
__entry->error = error;
),
- TP_printk("dev %d:%d ops %p committed %d low %d err %d\n",
+ TP_printk("dev %d:%d ops %p committed %d low %d err %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dop,
__entry->committed,
@@ -2310,7 +2293,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__field(dev_t, dev)
__field(int, type)
__field(void *, intent)
- __field(bool, committed)
+ __field(char, committed)
__field(int, nr)
),
TP_fast_assign(
@@ -2320,7 +2303,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__entry->committed = dfp->dfp_done != NULL;
__entry->nr = dfp->dfp_count;
),
- TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
+ TP_printk("dev %d:%d optype %d intent %p committed %d nr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->intent,
@@ -2615,7 +2598,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class,
__entry->asked = r ? r->ar_asked : 0;
__entry->len = len;
),
- TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n",
+ TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u "
+ "resv %u ask %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->resv,
@@ -2668,7 +2652,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
__entry->agbno = agbno;
__entry->dir = dir;
),
- TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n",
+ TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
@@ -2701,7 +2685,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
),
- TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n",
+ TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->startblock,
@@ -2736,7 +2720,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
__entry->refcount = irec->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n",
+ TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->startblock,
@@ -2777,7 +2761,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
__entry->i2_refcount = i2->rc_refcount;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
- "agbno %u len %u refcount %u\n",
+ "agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
@@ -2823,7 +2807,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
__entry->agbno = agbno;
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
- "agbno %u len %u refcount %u @ agbno %u\n",
+ "agbno %u len %u refcount %u @ agbno %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
@@ -2876,7 +2860,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
),
TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
"agbno %u len %u refcount %u -- "
- "agbno %u len %u refcount %u\n",
+ "agbno %u len %u refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->i1_startblock,
@@ -3002,31 +2986,6 @@ DEFINE_EVENT(xfs_inode_error_class, name, \
unsigned long caller_ip), \
TP_ARGS(ip, error, caller_ip))
-/* reflink allocator */
-TRACE_EVENT(xfs_bmap_remap_alloc,
- TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
- xfs_extlen_t len),
- TP_ARGS(ip, fsbno, len),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsblock_t, fsbno)
- __field(xfs_extlen_t, len)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->fsbno = fsbno;
- __entry->len = len;
- ),
- TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->fsbno,
- __entry->len)
-);
-DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
-
/* reflink tracepoint classes */
/* two-file io tracepoint class */
@@ -3089,6 +3048,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__field(xfs_fileoff_t, lblk)
__field(xfs_extlen_t, len)
__field(xfs_fsblock_t, pblk)
+ __field(int, state)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -3096,13 +3056,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__entry->lblk = irec->br_startoff;
__entry->len = irec->br_blockcount;
__entry->pblk = irec->br_startblock;
+ __entry->state = irec->br_state;
),
- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
+ TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->len,
- __entry->pblk)
+ __entry->pblk,
+ __entry->state)
);
#define DEFINE_INODE_IREC_EVENT(name) \
DEFINE_EVENT(xfs_inode_irec_class, name, \
@@ -3225,7 +3187,7 @@ TRACE_EVENT(xfs_ioctl_clone,
),
TP_printk("dev %d:%d "
"ino 0x%lx isize 0x%llx -> "
- "ino 0x%lx isize 0x%llx\n",
+ "ino 0x%lx isize 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->src_ino,
__entry->src_isize,
@@ -3242,11 +3204,11 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
+DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
-DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
@@ -3254,7 +3216,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
@@ -3266,6 +3227,88 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+/* fsmap traces */
+DECLARE_EVENT_CLASS(xfs_fsmap_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
+ struct xfs_rmap_irec *rmap),
+ TP_ARGS(mp, keydev, agno, rmap),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_fsblock_t, bno)
+ __field(xfs_filblks_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->agno = agno;
+ __entry->bno = rmap->rm_startblock;
+ __entry->len = rmap->rm_blockcount;
+ __entry->owner = rmap->rm_owner;
+ __entry->offset = rmap->rm_offset;
+ __entry->flags = rmap->rm_flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->agno,
+ __entry->bno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+)
+#define DEFINE_FSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
+ struct xfs_rmap_irec *rmap), \
+ TP_ARGS(mp, keydev, agno, rmap))
+DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+
+DECLARE_EVENT_CLASS(xfs_getfsmap_class,
+ TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
+ TP_ARGS(mp, fsmap),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_daddr_t, block)
+ __field(xfs_daddr_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(uint64_t, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(fsmap->fmr_device);
+ __entry->block = fsmap->fmr_physical;
+ __entry->len = fsmap->fmr_length;
+ __entry->owner = fsmap->fmr_owner;
+ __entry->offset = fsmap->fmr_offset;
+ __entry->flags = fsmap->fmr_flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->block,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+)
+#define DEFINE_GETFSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_getfsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \
+ TP_ARGS(mp, fsmap))
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 70f42ea86dfb..2011620008de 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -134,7 +134,7 @@ xfs_trans_reserve(
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
/* Mark this thread as being in a transaction */
- current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
/*
* Attempt to reserve the needed disk blocks by decrementing
@@ -144,7 +144,7 @@ xfs_trans_reserve(
if (blocks > 0) {
error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
if (error != 0) {
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return -ENOSPC;
}
tp->t_blk_res += blocks;
@@ -221,7 +221,7 @@ undo_blocks:
tp->t_blk_res = 0;
}
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return error;
}
@@ -263,6 +263,28 @@ xfs_trans_alloc(
}
/*
+ * Create an empty transaction with no reservation. This is a defensive
+ * mechanism for routines that query metadata without actually modifying
+ * them -- if the metadata being queried is somehow cross-linked (think a
+ * btree block pointer that points higher in the tree), we risk deadlock.
+ * However, blocks grabbed as part of a transaction can be re-grabbed.
+ * The verifiers will notice the corrupt block and the operation will fail
+ * back to userspace without deadlocking.
+ *
+ * Note the zero-length reservation; this transaction MUST be cancelled
+ * without any dirty data.
+ */
+int
+xfs_trans_alloc_empty(
+ struct xfs_mount *mp,
+ struct xfs_trans **tpp)
+{
+ struct xfs_trans_res resv = {0};
+
+ return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
+}
+
+/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
* For now, just store the change in the transaction structure.
@@ -914,7 +936,7 @@ __xfs_trans_commit(
xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free(tp);
/*
@@ -944,7 +966,7 @@ out_unreserve:
if (commit_lsn == -1 && !error)
error = -EIO;
}
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
xfs_trans_free(tp);
@@ -998,7 +1020,7 @@ xfs_trans_cancel(
xfs_log_done(mp, tp->t_ticket, NULL, false);
/* mark this thread as no longer being in a transaction */
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
xfs_trans_free(tp);
@@ -1012,17 +1034,14 @@ xfs_trans_cancel(
* chunk we've been working on and get a new transaction to continue.
*/
int
-__xfs_trans_roll(
+xfs_trans_roll(
struct xfs_trans **tpp,
- struct xfs_inode *dp,
- int *committed)
+ struct xfs_inode *dp)
{
struct xfs_trans *trans;
struct xfs_trans_res tres;
int error;
- *committed = 0;
-
/*
* Ensure that the inode is always logged.
*/
@@ -1048,7 +1067,6 @@ __xfs_trans_roll(
if (error)
return error;
- *committed = 1;
trans = *tpp;
/*
@@ -1071,12 +1089,3 @@ __xfs_trans_roll(
xfs_trans_ijoin(trans, dp, 0);
return 0;
}
-
-int
-xfs_trans_roll(
- struct xfs_trans **tpp,
- struct xfs_inode *dp)
-{
- int committed;
- return __xfs_trans_roll(tpp, dp, &committed);
-}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 61b7fbdd3ebd..6bdad6f58934 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -32,7 +32,6 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_trans_res;
struct xfs_dquot_acct;
-struct xfs_busy_extent;
struct xfs_rud_log_item;
struct xfs_rui_log_item;
struct xfs_btree_cur;
@@ -106,10 +105,6 @@ typedef struct xfs_trans {
unsigned int t_rtx_res; /* # of rt extents resvd */
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
struct xlog_ticket *t_ticket; /* log mgr ticket */
- xfs_lsn_t t_lsn; /* log seq num of start of
- * transaction. */
- xfs_lsn_t t_commit_lsn; /* log seq num of end of
- * transaction. */
struct xfs_mount *t_mountp; /* ptr to fs mount struct */
struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
unsigned int t_flags; /* misc flags */
@@ -159,6 +154,8 @@ typedef struct xfs_trans {
int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
uint blocks, uint rtextents, uint flags,
struct xfs_trans **tpp);
+int xfs_trans_alloc_empty(struct xfs_mount *mp,
+ struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
@@ -227,7 +224,6 @@ int xfs_trans_free_extent(struct xfs_trans *,
struct xfs_efd_log_item *, xfs_fsblock_t,
xfs_extlen_t, struct xfs_owner_info *);
int xfs_trans_commit(struct xfs_trans *);
-int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
void xfs_trans_cancel(xfs_trans_t *);
int xfs_trans_ail_init(struct xfs_mount *);
@@ -249,7 +245,7 @@ struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
struct xfs_rui_log_item *ruip);
int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
- __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+ uint64_t owner, int whichfork, xfs_fileoff_t startoff,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
@@ -275,6 +271,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
enum xfs_bmap_intent_type type, struct xfs_inode *ip,
int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
- xfs_filblks_t blockcount, xfs_exntst_t state);
+ xfs_filblks_t *blockcount, xfs_exntst_t state);
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d6c9c3e9e02b..9056c0f34a3c 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk(
}
}
-/*
- * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
+bool
+xfs_ail_delete_one(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_log_item *mlip = xfs_ail_min(ailp);
+
+ trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
+ xfs_ail_delete(ailp, lip);
+ lip->li_flags &= ~XFS_LI_IN_AIL;
+ lip->li_lsn = 0;
+
+ return mlip == lip;
+}
+
+/**
+ * Remove a log items from the AIL
*
* @xfs_trans_ail_delete_bulk takes an array of log items that all need to
* removed from the AIL. The caller is already holding the AIL lock, and done
@@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk(
* before returning.
*/
void
-xfs_trans_ail_delete_bulk(
+xfs_trans_ail_delete(
struct xfs_ail *ailp,
- struct xfs_log_item **log_items,
- int nr_items,
+ struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->xa_lock)
{
- xfs_log_item_t *mlip;
- int mlip_changed = 0;
- int i;
+ struct xfs_mount *mp = ailp->xa_mount;
+ bool mlip_changed;
- mlip = xfs_ail_min(ailp);
-
- for (i = 0; i < nr_items; i++) {
- struct xfs_log_item *lip = log_items[i];
- if (!(lip->li_flags & XFS_LI_IN_AIL)) {
- struct xfs_mount *mp = ailp->xa_mount;
-
- spin_unlock(&ailp->xa_lock);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
- "%s: attempting to delete a log item that is not in the AIL",
- __func__);
- xfs_force_shutdown(mp, shutdown_type);
- }
- return;
+ if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+ spin_unlock(&ailp->xa_lock);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
+ "%s: attempting to delete a log item that is not in the AIL",
+ __func__);
+ xfs_force_shutdown(mp, shutdown_type);
}
-
- trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
- xfs_ail_delete(ailp, lip);
- lip->li_flags &= ~XFS_LI_IN_AIL;
- lip->li_lsn = 0;
- if (mlip == lip)
- mlip_changed = 1;
+ return;
}
+ mlip_changed = xfs_ail_delete_one(ailp, lip);
if (mlip_changed) {
- if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
- xlog_assign_tail_lsn_locked(ailp->xa_mount);
+ if (!XFS_FORCED_SHUTDOWN(mp))
+ xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->xa_ail))
wake_up_all(&ailp->xa_empty);
- spin_unlock(&ailp->xa_lock);
+ }
+ spin_unlock(&ailp->xa_lock);
+ if (mlip_changed)
xfs_log_space_wake(ailp->xa_mount);
- } else {
- spin_unlock(&ailp->xa_lock);
- }
}
int
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 6408e7d7c08c..14543d93cd4b 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
+ xfs_filblks_t *blockcount,
xfs_exntst_t state)
{
int error;
@@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
void **state)
{
struct xfs_bmap_intent *bmap;
+ xfs_filblks_t count;
int error;
bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ count = bmap->bi_bmap.br_blockcount;
error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
bmap->bi_type,
bmap->bi_owner, bmap->bi_whichfork,
bmap->bi_bmap.br_startoff,
bmap->bi_bmap.br_startblock,
- bmap->bi_bmap.br_blockcount,
+ &count,
bmap->bi_bmap.br_state);
+ if (!error && count > 0) {
+ ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+ bmap->bi_bmap.br_blockcount = count;
+ return -EAGAIN;
+ }
kmem_free(bmap);
return error;
}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee29ca132dc..86987d823d76 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
+ int freed;
/*
* Default to a normal brelse() call if the tp is NULL.
@@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t *tp,
/*
* Drop our reference to the buf log item.
*/
- atomic_dec(&bip->bli_refcount);
+ freed = atomic_dec_and_test(&bip->bli_refcount);
/*
- * If the buf item is not tracking data in the log, then
- * we must free it before releasing the buffer back to the
- * free pool. Before releasing the buffer to the free pool,
- * clear the transaction pointer in b_fsprivate2 to dissolve
- * its relation to this transaction.
+ * If the buf item is not tracking data in the log, then we must free it
+ * before releasing the buffer back to the free pool.
+ *
+ * If the fs has shutdown and we dropped the last reference, it may fall
+ * on us to release a (possibly dirty) bli if it never made it to the
+ * AIL (e.g., the aborted unpin already happened and didn't release it
+ * due to our reference). Since we're already shutdown and need xa_lock,
+ * just force remove from the AIL and release the bli here.
*/
- if (!xfs_buf_item_dirty(bip)) {
+ if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
+ xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bp);
+ } else if (!xfs_buf_item_dirty(bip)) {
/***
ASSERT(bp->b_pincount == 0);
***/
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 49931b72da8a..d91706c56c63 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -106,18 +106,9 @@ xfs_trans_ail_update(
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
-void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
- struct xfs_log_item **log_items, int nr_items,
- int shutdown_type)
- __releases(ailp->xa_lock);
-static inline void
-xfs_trans_ail_delete(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip,
- int shutdown_type) __releases(ailp->xa_lock)
-{
- xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
-}
+bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
+void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
+ int shutdown_type) __releases(ailp->xa_lock);
static inline void
xfs_trans_ail_remove(
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 9ead064b5e90..9b577beb43d7 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -96,7 +96,7 @@ xfs_trans_log_finish_rmap_update(
struct xfs_trans *tp,
struct xfs_rud_log_item *rudp,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,