aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c78
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.h25
-rw-r--r--fs/xfs/scrub/newbt.c12
-rw-r--r--fs/xfs/xfs_buf.c44
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_globals.c12
-rw-r--r--fs/xfs/xfs_sysctl.h2
-rw-r--r--fs/xfs/xfs_sysfs.c54
10 files changed, 198 insertions, 35 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 6a6503ab0cd7..c100e92140be 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block(
* Read in the buffer at the given ptr and return the buffer and
* the block pointer within the buffer.
*/
-STATIC int
+int
xfs_btree_read_buf_block(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr,
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 4d68a58be160..e0875cec4939 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
int xfs_btree_get_buf_block(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr, struct xfs_btree_block **block,
struct xfs_buf **bpp);
+int xfs_btree_read_buf_block(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr, int flags,
+ struct xfs_btree_block **block, struct xfs_buf **bpp);
void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, const union xfs_btree_ptr *ptr,
int lr);
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index dd75e208b543..0c978a31e284 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot(
/*
* Put a btree block that we're loading onto the ordered list and release it.
* The btree blocks will be written to disk when bulk loading is finished.
+ * If we reach the dirty buffer threshold, flush them to disk before
+ * continuing.
*/
-static void
+static int
xfs_btree_bload_drop_buf(
- struct list_head *buffers_list,
- struct xfs_buf **bpp)
+ struct xfs_btree_bload *bbl,
+ struct list_head *buffers_list,
+ struct xfs_buf **bpp)
{
- if (*bpp == NULL)
- return;
+ struct xfs_buf *bp = *bpp;
+ int error;
- if (!xfs_buf_delwri_queue(*bpp, buffers_list))
- ASSERT(0);
+ if (!bp)
+ return 0;
+
+ /*
+ * Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent
+ * xfs_buf_read will not pointlessly reread the contents from the disk.
+ */
+ bp->b_flags |= XBF_DONE;
- xfs_buf_relse(*bpp);
+ xfs_buf_delwri_queue_here(bp, buffers_list);
+ xfs_buf_relse(bp);
*bpp = NULL;
+ bbl->nr_dirty++;
+
+ if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty)
+ return 0;
+
+ error = xfs_buf_delwri_submit(buffers_list);
+ if (error)
+ return error;
+
+ bbl->nr_dirty = 0;
+ return 0;
}
/*
@@ -418,7 +439,10 @@ xfs_btree_bload_prep_block(
*/
if (*blockp)
xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB);
- xfs_btree_bload_drop_buf(buffers_list, bpp);
+
+ ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp);
+ if (ret)
+ return ret;
/* Initialize the new btree block. */
xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block);
@@ -436,22 +460,19 @@ STATIC int
xfs_btree_bload_leaf(
struct xfs_btree_cur *cur,
unsigned int recs_this_block,
- xfs_btree_bload_get_record_fn get_record,
+ xfs_btree_bload_get_records_fn get_records,
struct xfs_btree_block *block,
void *priv)
{
- unsigned int j;
+ unsigned int j = 1;
int ret;
/* Fill the leaf block with records. */
- for (j = 1; j <= recs_this_block; j++) {
- union xfs_btree_rec *block_rec;
-
- ret = get_record(cur, priv);
- if (ret)
+ while (j <= recs_this_block) {
+ ret = get_records(cur, j, block, recs_this_block - j + 1, priv);
+ if (ret < 0)
return ret;
- block_rec = xfs_btree_rec_addr(cur, j, block);
- cur->bc_ops->init_rec_from_cur(cur, block_rec);
+ j += ret;
}
return 0;
@@ -485,7 +506,12 @@ xfs_btree_bload_node(
ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr));
- ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block,
+ /*
+ * Read the lower-level block in case the buffer for it has
+ * been reclaimed. LRU refs will be set on the block, which is
+ * desirable if the new btree commits.
+ */
+ ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block,
&child_bp);
if (ret)
return ret;
@@ -764,6 +790,7 @@ xfs_btree_bload(
cur->bc_nlevels = bbl->btree_height;
xfs_btree_set_ptr_null(cur, &child_ptr);
xfs_btree_set_ptr_null(cur, &ptr);
+ bbl->nr_dirty = 0;
xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
&avg_per_block, &blocks, &blocks_with_extra);
@@ -789,7 +816,7 @@ xfs_btree_bload(
trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr,
nr_this_block);
- ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record,
+ ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records,
block, priv);
if (ret)
goto out;
@@ -802,7 +829,10 @@ xfs_btree_bload(
xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1);
}
total_blocks += blocks;
- xfs_btree_bload_drop_buf(&buffers_list, &bp);
+
+ ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
+ if (ret)
+ goto out;
/* Populate the internal btree nodes. */
for (level = 1; level < cur->bc_nlevels; level++) {
@@ -844,7 +874,11 @@ xfs_btree_bload(
xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1);
}
total_blocks += blocks;
- xfs_btree_bload_drop_buf(&buffers_list, &bp);
+
+ ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
+ if (ret)
+ goto out;
+
xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1);
}
diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h
index 5f638f711246..f0a5007284ef 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.h
+++ b/fs/xfs/libxfs/xfs_btree_staging.h
@@ -47,7 +47,9 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
int whichfork, const struct xfs_btree_ops *ops);
/* Bulk loading of staged btrees. */
-typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv);
+typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
+ unsigned int idx, struct xfs_btree_block *block,
+ unsigned int nr_wanted, void *priv);
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr, void *priv);
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
@@ -55,11 +57,14 @@ typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
struct xfs_btree_bload {
/*
- * This function will be called nr_records times to load records into
- * the btree. The function does this by setting the cursor's bc_rec
- * field in in-core format. Records must be returned in sort order.
+ * This function will be called to load @nr_wanted records into the
+ * btree. The implementation does this by setting the cursor's bc_rec
+ * field in in-core format and using init_rec_from_cur to set the
+ * records in the btree block. Records must be returned in sort order.
+ * The function must return the number of records loaded or the usual
+ * negative errno.
*/
- xfs_btree_bload_get_record_fn get_record;
+ xfs_btree_bload_get_records_fn get_records;
/*
* This function will be called nr_blocks times to obtain a pointer
@@ -107,6 +112,16 @@ struct xfs_btree_bload {
* height of the new btree.
*/
unsigned int btree_height;
+
+ /*
+ * Flush the new btree block buffer list to disk after this many blocks
+ * have been formatted. Zero prohibits writing any buffers until all
+ * blocks have been formatted.
+ */
+ uint16_t max_dirty;
+
+ /* Number of dirty buffers. */
+ uint16_t nr_dirty;
};
int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur,
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 992cf34a13e7..81919eeabcdb 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -32,6 +32,7 @@
* btree bulk loading code calculates for us. However, there are some
* exceptions to this rule:
*
+ * (0) If someone turned one of the debug knobs.
* (1) If this is a per-AG btree and the AG has less than 10% space free.
* (2) If this is an inode btree and the FS has less than 10% space free.
@@ -47,9 +48,13 @@ xrep_newbt_estimate_slack(
uint64_t free;
uint64_t sz;
- /* Let the btree code compute the default slack values. */
- bload->leaf_slack = -1;
- bload->node_slack = -1;
+ /*
+ * The xfs_globals values are set to -1 (i.e. take the bload defaults)
+ * unless someone has set them otherwise, so we just pull the values
+ * here.
+ */
+ bload->leaf_slack = xfs_globals.bload_leaf_slack;
+ bload->node_slack = xfs_globals.bload_node_slack;
if (sc->ops->type == ST_PERAG) {
free = sc->sa.pag->pagf_freeblks;
@@ -89,6 +94,7 @@ xrep_newbt_init_ag(
xnr->alloc_hint = alloc_hint;
xnr->resv = resv;
INIT_LIST_HEAD(&xnr->resv_list);
+ xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
xrep_newbt_estimate_slack(xnr);
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 545c7991b9b5..ec4bd7a24d88 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2049,6 +2049,14 @@ error_free:
return NULL;
}
+static inline void
+xfs_buf_list_del(
+ struct xfs_buf *bp)
+{
+ list_del_init(&bp->b_list);
+ wake_up_var(&bp->b_list);
+}
+
/*
* Cancel a delayed write list.
*
@@ -2066,7 +2074,7 @@ xfs_buf_delwri_cancel(
xfs_buf_lock(bp);
bp->b_flags &= ~_XBF_DELWRI_Q;
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
xfs_buf_relse(bp);
}
}
@@ -2120,6 +2128,34 @@ xfs_buf_delwri_queue(
}
/*
+ * Queue a buffer to this delwri list as part of a data integrity operation.
+ * If the buffer is on any other delwri list, we'll wait for that to clear
+ * so that the caller can submit the buffer for IO and wait for the result.
+ * Callers must ensure the buffer is not already on the list.
+ */
+void
+xfs_buf_delwri_queue_here(
+ struct xfs_buf *bp,
+ struct list_head *buffer_list)
+{
+ /*
+ * We need this buffer to end up on the /caller's/ delwri list, not any
+ * old list. This can happen if the buffer is marked stale (which
+ * clears DELWRI_Q) after the AIL queues the buffer to its list but
+ * before the AIL has a chance to submit the list.
+ */
+ while (!list_empty(&bp->b_list)) {
+ xfs_buf_unlock(bp);
+ wait_var_event(&bp->b_list, list_empty(&bp->b_list));
+ xfs_buf_lock(bp);
+ }
+
+ ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
+
+ xfs_buf_delwri_queue(bp, buffer_list);
+}
+
+/*
* Compare function is more complex than it needs to be because
* the return value is only 32 bits and we are doing comparisons
* on 64 bit values
@@ -2181,7 +2217,7 @@ xfs_buf_delwri_submit_buffers(
* reference and remove it from the list here.
*/
if (!(bp->b_flags & _XBF_DELWRI_Q)) {
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
xfs_buf_relse(bp);
continue;
}
@@ -2201,7 +2237,7 @@ xfs_buf_delwri_submit_buffers(
list_move_tail(&bp->b_list, wait_list);
} else {
bp->b_flags |= XBF_ASYNC;
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
}
__xfs_buf_submit(bp, false);
}
@@ -2255,7 +2291,7 @@ xfs_buf_delwri_submit(
while (!list_empty(&wait_list)) {
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
/*
* Wait on the locked buffer, check for errors and unlock and
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c86e16419656..b470de08a46c 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -319,6 +319,7 @@ extern void xfs_buf_stale(struct xfs_buf *bp);
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
+void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 9edc1f2bc939..f18fec0adf66 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -44,4 +44,16 @@ struct xfs_globals xfs_globals = {
.pwork_threads = -1, /* automatic thread detection */
.larp = false, /* log attribute replay */
#endif
+
+ /*
+ * Leave this many record slots empty when bulk loading btrees. By
+ * default we load new btree leaf blocks 75% full.
+ */
+ .bload_leaf_slack = -1,
+
+ /*
+ * Leave this many key/ptr slots empty when bulk loading btrees. By
+ * default we load new btree node blocks 75% full.
+ */
+ .bload_node_slack = -1,
};
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index f78ad6b10ea5..276696a07040 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -85,6 +85,8 @@ struct xfs_globals {
int pwork_threads; /* parallel workqueue threads */
bool larp; /* log attribute replay */
#endif
+ int bload_leaf_slack; /* btree bulk load leaf slack */
+ int bload_node_slack; /* btree bulk load node slack */
int log_recovery_delay; /* log recovery delay (secs) */
int mount_delay; /* mount setup delay (secs) */
bool bug_on_assert; /* BUG() the kernel on assert failure */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 871f16a4a5d8..17485666b672 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -262,6 +262,58 @@ larp_show(
XFS_SYSFS_ATTR_RW(larp);
#endif /* DEBUG */
+STATIC ssize_t
+bload_leaf_slack_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ xfs_globals.bload_leaf_slack = val;
+ return count;
+}
+
+STATIC ssize_t
+bload_leaf_slack_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_leaf_slack);
+}
+XFS_SYSFS_ATTR_RW(bload_leaf_slack);
+
+STATIC ssize_t
+bload_node_slack_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ xfs_globals.bload_node_slack = val;
+ return count;
+}
+
+STATIC ssize_t
+bload_node_slack_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_node_slack);
+}
+XFS_SYSFS_ATTR_RW(bload_node_slack);
+
static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
@@ -271,6 +323,8 @@ static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(pwork_threads),
ATTR_LIST(larp),
#endif
+ ATTR_LIST(bload_leaf_slack),
+ ATTR_LIST(bload_node_slack),
NULL,
};
ATTRIBUTE_GROUPS(xfs_dbg);