diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-07-22 13:37:02 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:26 -0400 |
commit | 63095894686cb4e16ad6a8329e95681cee63d615 (patch) | |
tree | 46e17990156c8db36e7d5e02ff67f67d3609c098 | |
parent | c0fc30dad5820b9e7d27355ec8a507f61d27a299 (diff) |
bcachefs: Improved bch2_fcollapse()
Move extents instead of copying them - this way, we can iterate over
only live extents, not the entire keyspace. Also, this means we can
mostly skip running triggers.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/buckets.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.c | 126 |
2 files changed, 88 insertions, 39 deletions
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b6340a2f6deb..637a9e909f82 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1486,6 +1486,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, bch2_fs_inconsistent_on(overflow, c, "bucket sector count overflow: %u + %lli > U16_MAX", old, sectors); + BUG_ON(overflow); a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX); ret = PTR_ERR_OR_ZERO(a); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 0dfe822cecbf..4a016c19dcbd 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2602,9 +2602,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; struct btree_trans trans; - struct btree_iter *src, *dst; - BKEY_PADDED(k) copy; - struct bkey_s_c k; + struct btree_iter *src, *dst, *del = NULL; loff_t new_size; int ret; @@ -2636,74 +2634,124 @@ static long bch2_fcollapse(struct bch_inode_info *inode, if (ret) goto err; + ret = __bch2_fpunch(c, inode, offset >> 9, + (offset + len) >> 9); + if (ret) + goto err; + dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + BTREE_ITER_INTENT); BUG_ON(IS_ERR_OR_NULL(dst)); src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, - POS_MIN, BTREE_ITER_SLOTS); + POS(inode->v.i_ino, (offset + len) >> 9), + BTREE_ITER_INTENT); BUG_ON(IS_ERR_OR_NULL(src)); - while (bkey_cmp(dst->pos, - POS(inode->v.i_ino, - round_up(new_size, block_bytes(c)) >> 9)) < 0) { - struct disk_reservation disk_res; + while (1) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(c, 0); + BKEY_PADDED(k) copy; + struct bkey_i delete; + struct bkey_s_c k; + struct bpos next_pos; + unsigned commit_flags = BTREE_INSERT_NOFAIL| + BTREE_INSERT_ATOMIC| + BTREE_INSERT_USE_RESERVE; - ret = bch2_btree_iter_traverse(dst); - if (ret) + k = bch2_btree_iter_peek(src); + if ((ret = bkey_err(k))) goto bkey_err; - bch2_btree_iter_set_pos(src, - POS(dst->pos.inode, dst->pos.offset + (len >> 9))); + if (!k.k || k.k->p.inode != inode->v.i_ino) + break; - k = bch2_btree_iter_peek_slot(src); - if ((ret = bkey_err(k))) - goto bkey_err; + BUG_ON(src->pos.offset != bkey_start_offset(k.k)); - bkey_reassemble(©.k, k); + bch2_btree_iter_set_pos(dst, + POS(inode->v.i_ino, src->pos.offset - (len >> 9))); - bch2_cut_front(src->pos, ©.k); - copy.k.k.p.offset -= len >> 9; + ret = bch2_btree_iter_traverse(dst); + if (ret) + goto bkey_err; + bkey_reassemble(©.k, k); + copy.k.k.p = dst->pos; + copy.k.k.p.offset += copy.k.k.size; ret = bch2_extent_trim_atomic(©.k, dst); if (ret) goto bkey_err; - BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(©.k.k))); + bkey_init(&delete.k); + delete.k.p = src->pos; + bch2_key_resize(&delete.k, copy.k.k.size); - ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size, - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); + next_pos = delete.k.p; - bch2_trans_begin_updates(&trans); + /* + * If the new and old keys overlap (because we're moving an + * extent that's bigger than the amount we're collapsing by), + * we need to trim the delete key here so they don't overlap + * because overlaps on insertions aren't handled before + * triggers are run, so the overwrite will get double counted + * by the triggers machinery: + */ + if (bkey_cmp(copy.k.k.p, bkey_start_pos(&delete.k)) > 0) { + bch2_cut_front(copy.k.k.p, &delete); - ret = bch2_extent_update(&trans, inode, - &disk_res, NULL, - dst, ©.k, - 0, true, true, NULL); + del = bch2_trans_copy_iter(&trans, src); + BUG_ON(IS_ERR_OR_NULL(del)); + + bch2_btree_iter_set_pos(del, + bkey_start_pos(&delete.k)); + bch2_trans_update(&trans, + BTREE_INSERT_ENTRY(del, &delete)); + } else { + bch2_trans_update(&trans, + BTREE_INSERT_ENTRY(src, &delete)); + } + + bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, ©.k)); + + if (copy.k.k.size == k.k->size) { + /* + * If we're moving the entire extent, we can skip + * running triggers: + */ + commit_flags |= BTREE_INSERT_NOMARK; + } else { + /* We might end up splitting compressed extents: */ + unsigned nr_ptrs = + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)); + + ret = bch2_disk_reservation_get(c, &disk_res, + copy.k.k.size, nr_ptrs, + BCH_DISK_RESERVATION_NOFAIL); + BUG_ON(ret); + } + + ret = bch2_trans_commit(&trans, &disk_res, + &inode->ei_journal_seq, + commit_flags); bch2_disk_reservation_put(c, &disk_res); bkey_err: + if (del) + bch2_trans_iter_free(&trans, del); + del = NULL; + + if (!ret) + bch2_btree_iter_set_pos(src, next_pos); + if (ret == -EINTR) ret = 0; if (ret) goto err; - /* - * XXX: if we error here we've left data with multiple - * pointers... which isn't a _super_ serious problem... - */ bch2_trans_cond_resched(&trans); } bch2_trans_unlock(&trans); - ret = __bch2_fpunch(c, inode, - round_up(new_size, block_bytes(c)) >> 9, - U64_MAX); - if (ret) - goto err; - i_size_write(&inode->v, new_size); mutex_lock(&inode->ei_update_lock); ret = bch2_write_inode_size(c, inode, new_size, |