diff options
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree_staging.c | 78 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree_staging.h | 25 | ||||
-rw-r--r-- | fs/xfs/scrub/newbt.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 44 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_globals.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 54 |
10 files changed, 198 insertions, 35 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 6a6503ab0cd7..c100e92140be 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block( * Read in the buffer at the given ptr and return the buffer and * the block pointer within the buffer. */ -STATIC int +int xfs_btree_read_buf_block( struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4d68a58be160..e0875cec4939 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur, int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, struct xfs_btree_block **block, struct xfs_buf **bpp); +int xfs_btree_read_buf_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int flags, + struct xfs_btree_block **block, struct xfs_buf **bpp); void xfs_btree_set_sibling(struct xfs_btree_cur *cur, struct xfs_btree_block *block, const union xfs_btree_ptr *ptr, int lr); diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index dd75e208b543..0c978a31e284 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot( /* * Put a btree block that we're loading onto the ordered list and release it. * The btree blocks will be written to disk when bulk loading is finished. + * If we reach the dirty buffer threshold, flush them to disk before + * continuing. */ -static void +static int xfs_btree_bload_drop_buf( - struct list_head *buffers_list, - struct xfs_buf **bpp) + struct xfs_btree_bload *bbl, + struct list_head *buffers_list, + struct xfs_buf **bpp) { - if (*bpp == NULL) - return; + struct xfs_buf *bp = *bpp; + int error; - if (!xfs_buf_delwri_queue(*bpp, buffers_list)) - ASSERT(0); + if (!bp) + return 0; + + /* + * Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent + * xfs_buf_read will not pointlessly reread the contents from the disk. + */ + bp->b_flags |= XBF_DONE; - xfs_buf_relse(*bpp); + xfs_buf_delwri_queue_here(bp, buffers_list); + xfs_buf_relse(bp); *bpp = NULL; + bbl->nr_dirty++; + + if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty) + return 0; + + error = xfs_buf_delwri_submit(buffers_list); + if (error) + return error; + + bbl->nr_dirty = 0; + return 0; } /* @@ -418,7 +439,10 @@ xfs_btree_bload_prep_block( */ if (*blockp) xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB); - xfs_btree_bload_drop_buf(buffers_list, bpp); + + ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp); + if (ret) + return ret; /* Initialize the new btree block. */ xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block); @@ -436,22 +460,19 @@ STATIC int xfs_btree_bload_leaf( struct xfs_btree_cur *cur, unsigned int recs_this_block, - xfs_btree_bload_get_record_fn get_record, + xfs_btree_bload_get_records_fn get_records, struct xfs_btree_block *block, void *priv) { - unsigned int j; + unsigned int j = 1; int ret; /* Fill the leaf block with records. */ - for (j = 1; j <= recs_this_block; j++) { - union xfs_btree_rec *block_rec; - - ret = get_record(cur, priv); - if (ret) + while (j <= recs_this_block) { + ret = get_records(cur, j, block, recs_this_block - j + 1, priv); + if (ret < 0) return ret; - block_rec = xfs_btree_rec_addr(cur, j, block); - cur->bc_ops->init_rec_from_cur(cur, block_rec); + j += ret; } return 0; @@ -485,7 +506,12 @@ xfs_btree_bload_node( ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr)); - ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block, + /* + * Read the lower-level block in case the buffer for it has + * been reclaimed. LRU refs will be set on the block, which is + * desirable if the new btree commits. + */ + ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block, &child_bp); if (ret) return ret; @@ -764,6 +790,7 @@ xfs_btree_bload( cur->bc_nlevels = bbl->btree_height; xfs_btree_set_ptr_null(cur, &child_ptr); xfs_btree_set_ptr_null(cur, &ptr); + bbl->nr_dirty = 0; xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &blocks, &blocks_with_extra); @@ -789,7 +816,7 @@ xfs_btree_bload( trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr, nr_this_block); - ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record, + ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records, block, priv); if (ret) goto out; @@ -802,7 +829,10 @@ xfs_btree_bload( xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1); } total_blocks += blocks; - xfs_btree_bload_drop_buf(&buffers_list, &bp); + + ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp); + if (ret) + goto out; /* Populate the internal btree nodes. */ for (level = 1; level < cur->bc_nlevels; level++) { @@ -844,7 +874,11 @@ xfs_btree_bload( xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1); } total_blocks += blocks; - xfs_btree_bload_drop_buf(&buffers_list, &bp); + + ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp); + if (ret) + goto out; + xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1); } diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h index 5f638f711246..f0a5007284ef 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.h +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -47,7 +47,9 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, int whichfork, const struct xfs_btree_ops *ops); /* Bulk loading of staged btrees. */ -typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv); +typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur, + unsigned int idx, struct xfs_btree_block *block, + unsigned int nr_wanted, void *priv); typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv); typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, @@ -55,11 +57,14 @@ typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, struct xfs_btree_bload { /* - * This function will be called nr_records times to load records into - * the btree. The function does this by setting the cursor's bc_rec - * field in in-core format. Records must be returned in sort order. + * This function will be called to load @nr_wanted records into the + * btree. The implementation does this by setting the cursor's bc_rec + * field in in-core format and using init_rec_from_cur to set the + * records in the btree block. Records must be returned in sort order. + * The function must return the number of records loaded or the usual + * negative errno. */ - xfs_btree_bload_get_record_fn get_record; + xfs_btree_bload_get_records_fn get_records; /* * This function will be called nr_blocks times to obtain a pointer @@ -107,6 +112,16 @@ struct xfs_btree_bload { * height of the new btree. */ unsigned int btree_height; + + /* + * Flush the new btree block buffer list to disk after this many blocks + * have been formatted. Zero prohibits writing any buffers until all + * blocks have been formatted. + */ + uint16_t max_dirty; + + /* Number of dirty buffers. */ + uint16_t nr_dirty; }; int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur, diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c index 992cf34a13e7..81919eeabcdb 100644 --- a/fs/xfs/scrub/newbt.c +++ b/fs/xfs/scrub/newbt.c @@ -32,6 +32,7 @@ * btree bulk loading code calculates for us. However, there are some * exceptions to this rule: * + * (0) If someone turned one of the debug knobs. * (1) If this is a per-AG btree and the AG has less than 10% space free. * (2) If this is an inode btree and the FS has less than 10% space free. @@ -47,9 +48,13 @@ xrep_newbt_estimate_slack( uint64_t free; uint64_t sz; - /* Let the btree code compute the default slack values. */ - bload->leaf_slack = -1; - bload->node_slack = -1; + /* + * The xfs_globals values are set to -1 (i.e. take the bload defaults) + * unless someone has set them otherwise, so we just pull the values + * here. + */ + bload->leaf_slack = xfs_globals.bload_leaf_slack; + bload->node_slack = xfs_globals.bload_node_slack; if (sc->ops->type == ST_PERAG) { free = sc->sa.pag->pagf_freeblks; @@ -89,6 +94,7 @@ xrep_newbt_init_ag( xnr->alloc_hint = alloc_hint; xnr->resv = resv; INIT_LIST_HEAD(&xnr->resv_list); + xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */ xrep_newbt_estimate_slack(xnr); } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 545c7991b9b5..ec4bd7a24d88 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2049,6 +2049,14 @@ error_free: return NULL; } +static inline void +xfs_buf_list_del( + struct xfs_buf *bp) +{ + list_del_init(&bp->b_list); + wake_up_var(&bp->b_list); +} + /* * Cancel a delayed write list. * @@ -2066,7 +2074,7 @@ xfs_buf_delwri_cancel( xfs_buf_lock(bp); bp->b_flags &= ~_XBF_DELWRI_Q; - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); xfs_buf_relse(bp); } } @@ -2120,6 +2128,34 @@ xfs_buf_delwri_queue( } /* + * Queue a buffer to this delwri list as part of a data integrity operation. + * If the buffer is on any other delwri list, we'll wait for that to clear + * so that the caller can submit the buffer for IO and wait for the result. + * Callers must ensure the buffer is not already on the list. + */ +void +xfs_buf_delwri_queue_here( + struct xfs_buf *bp, + struct list_head *buffer_list) +{ + /* + * We need this buffer to end up on the /caller's/ delwri list, not any + * old list. This can happen if the buffer is marked stale (which + * clears DELWRI_Q) after the AIL queues the buffer to its list but + * before the AIL has a chance to submit the list. + */ + while (!list_empty(&bp->b_list)) { + xfs_buf_unlock(bp); + wait_var_event(&bp->b_list, list_empty(&bp->b_list)); + xfs_buf_lock(bp); + } + + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + + xfs_buf_delwri_queue(bp, buffer_list); +} + +/* * Compare function is more complex than it needs to be because * the return value is only 32 bits and we are doing comparisons * on 64 bit values @@ -2181,7 +2217,7 @@ xfs_buf_delwri_submit_buffers( * reference and remove it from the list here. */ if (!(bp->b_flags & _XBF_DELWRI_Q)) { - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); xfs_buf_relse(bp); continue; } @@ -2201,7 +2237,7 @@ xfs_buf_delwri_submit_buffers( list_move_tail(&bp->b_list, wait_list); } else { bp->b_flags |= XBF_ASYNC; - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); } __xfs_buf_submit(bp, false); } @@ -2255,7 +2291,7 @@ xfs_buf_delwri_submit( while (!list_empty(&wait_list)) { bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); /* * Wait on the locked buffer, check for errors and unlock and diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c86e16419656..b470de08a46c 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -319,6 +319,7 @@ extern void xfs_buf_stale(struct xfs_buf *bp); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); +void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 9edc1f2bc939..f18fec0adf66 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -44,4 +44,16 @@ struct xfs_globals xfs_globals = { .pwork_threads = -1, /* automatic thread detection */ .larp = false, /* log attribute replay */ #endif + + /* + * Leave this many record slots empty when bulk loading btrees. By + * default we load new btree leaf blocks 75% full. + */ + .bload_leaf_slack = -1, + + /* + * Leave this many key/ptr slots empty when bulk loading btrees. By + * default we load new btree node blocks 75% full. + */ + .bload_node_slack = -1, }; diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index f78ad6b10ea5..276696a07040 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -85,6 +85,8 @@ struct xfs_globals { int pwork_threads; /* parallel workqueue threads */ bool larp; /* log attribute replay */ #endif + int bload_leaf_slack; /* btree bulk load leaf slack */ + int bload_node_slack; /* btree bulk load node slack */ int log_recovery_delay; /* log recovery delay (secs) */ int mount_delay; /* mount setup delay (secs) */ bool bug_on_assert; /* BUG() the kernel on assert failure */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 871f16a4a5d8..17485666b672 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -262,6 +262,58 @@ larp_show( XFS_SYSFS_ATTR_RW(larp); #endif /* DEBUG */ +STATIC ssize_t +bload_leaf_slack_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + xfs_globals.bload_leaf_slack = val; + return count; +} + +STATIC ssize_t +bload_leaf_slack_show( + struct kobject *kobject, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_leaf_slack); +} +XFS_SYSFS_ATTR_RW(bload_leaf_slack); + +STATIC ssize_t +bload_node_slack_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + xfs_globals.bload_node_slack = val; + return count; +} + +STATIC ssize_t +bload_node_slack_show( + struct kobject *kobject, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_node_slack); +} +XFS_SYSFS_ATTR_RW(bload_node_slack); + static struct attribute *xfs_dbg_attrs[] = { ATTR_LIST(bug_on_assert), ATTR_LIST(log_recovery_delay), @@ -271,6 +323,8 @@ static struct attribute *xfs_dbg_attrs[] = { ATTR_LIST(pwork_threads), ATTR_LIST(larp), #endif + ATTR_LIST(bload_leaf_slack), + ATTR_LIST(bload_node_slack), NULL, }; ATTRIBUTE_GROUPS(xfs_dbg); |