diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-04-15 14:58:00 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:20 -0400 |
commit | c6dd04f8f5644d92361bb2d6e47fa9b4d5af6d79 (patch) | |
tree | 1064fd24845685f95801d72c8211e978dcd94a64 | |
parent | d07343561e263fcbbdb8042f35ca29a602190e18 (diff) |
bcachefs: Mark overwrites from journal replay in initial gc
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/btree_gc.c | 35 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 46 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 104 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 107 |
6 files changed, 192 insertions, 107 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index cf0a2f4b22af..2650f60b7cd7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -273,11 +273,40 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) (int) btree_id_to_gc_phase(r); } +static int mark_journal_key(struct bch_fs *c, enum btree_id id, + struct bkey_i *insert) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + u8 max_stale; + int ret = 0; + + ret = bch2_gc_mark_key(c, bkey_i_to_s_c(insert), &max_stale, true); + if (ret) + return ret; + + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k), + BTREE_ITER_SLOTS, k) { + percpu_down_read(&c->mark_lock); + ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL, + BCH_BUCKET_MARK_GC| + BCH_BUCKET_MARK_NOATOMIC); + percpu_up_read(&c->mark_lock); + + if (!ret) + break; + } + + return bch2_trans_exit(&trans); +} + static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, bool initial, bool metadata_only) { enum btree_id ids[BTREE_ID_NR]; - u8 max_stale; unsigned i; for (i = 0; i < BTREE_ID_NR; i++) @@ -299,9 +328,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, for_each_journal_key(*journal_keys, j) if (j->btree_id == id) { - ret = bch2_gc_mark_key(c, - bkey_i_to_s_c(j->k), - &max_stale, initial); + ret = mark_journal_key(c, id, j->k); if (ret) return ret; } diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 75ed02874767..7a638a76634f 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -43,6 +43,7 @@ enum { __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_RESERVED, + __BTREE_INSERT_NOMARK_OVERWRITES, __BTREE_INSERT_NOMARK, __BTREE_INSERT_NOWAIT, __BTREE_INSERT_GC_LOCK_HELD, @@ -76,6 +77,9 @@ enum { #define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED) +/* Don't mark overwrites, just new key: */ +#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES) + /* Don't call bch2_mark_key: */ #define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 48d3be517471..2633a5452b13 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -542,20 +542,22 @@ static inline int do_btree_insert_at(struct btree_trans *trans, btree_trans_lock_write(c, trans); - trans_for_each_update_iter(trans, i) { - if (i->deferred || - !btree_node_type_needs_gc(i->iter->btree_id)) - continue; + if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) { + trans_for_each_update_iter(trans, i) { + if (i->deferred || + !btree_node_type_needs_gc(i->iter->btree_id)) + continue; - if (!fs_usage) { - percpu_down_read(&c->mark_lock); - fs_usage = bch2_fs_usage_scratch_get(c); - } + if (!fs_usage) { + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); + } - if (!bch2_bkey_replicas_marked_locked(c, - bkey_i_to_s_c(i->k), true)) { - ret = BTREE_INSERT_NEED_MARK_REPLICAS; - goto out; + if (!bch2_bkey_replicas_marked_locked(c, + bkey_i_to_s_c(i->k), true)) { + ret = BTREE_INSERT_NEED_MARK_REPLICAS; + goto out; + } } } @@ -602,16 +604,18 @@ static inline int do_btree_insert_at(struct btree_trans *trans, linked->flags |= BTREE_ITER_NOUNLOCK; } - trans_for_each_update_iter(trans, i) - bch2_mark_update(trans, i, fs_usage, 0); - if (fs_usage) - bch2_trans_fs_usage_apply(trans, fs_usage); - - if (unlikely(c->gc_pos.phase)) { + if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) { trans_for_each_update_iter(trans, i) - if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) - bch2_mark_update(trans, i, NULL, - BCH_BUCKET_MARK_GC); + bch2_mark_update(trans, i, fs_usage, 0); + if (fs_usage) + bch2_trans_fs_usage_apply(trans, fs_usage); + + if (unlikely(c->gc_pos.phase)) { + trans_for_each_update_iter(trans, i) + if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) + bch2_mark_update(trans, i, NULL, + BCH_BUCKET_MARK_GC); + } } trans_for_each_update(trans, i) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4fe66ee1f745..7a05ba5fd589 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1035,6 +1035,56 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, return ret; } +inline bool bch2_mark_overwrite(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c old, + struct bkey_i *new, + struct bch_fs_usage *fs_usage, + unsigned flags) +{ + struct bch_fs *c = trans->c; + struct btree *b = iter->l[0].b; + s64 sectors = 0; + + if (btree_node_is_extents(b) + ? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0 + : bkey_cmp(new->k.p, old.k->p)) + return false; + + if (btree_node_is_extents(b)) { + switch (bch2_extent_overlap(&new->k, old.k)) { + case BCH_EXTENT_OVERLAP_ALL: + sectors = -((s64) old.k->size); + break; + case BCH_EXTENT_OVERLAP_BACK: + sectors = bkey_start_offset(&new->k) - + old.k->p.offset; + break; + case BCH_EXTENT_OVERLAP_FRONT: + sectors = bkey_start_offset(old.k) - + new->k.p.offset; + break; + case BCH_EXTENT_OVERLAP_MIDDLE: + sectors = old.k->p.offset - new->k.p.offset; + BUG_ON(sectors <= 0); + + bch2_mark_key_locked(c, old, true, sectors, + fs_usage, trans->journal_res.seq, + flags); + + sectors = bkey_start_offset(&new->k) - + old.k->p.offset; + break; + } + + BUG_ON(sectors >= 0); + } + + bch2_mark_key_locked(c, old, false, sectors, + fs_usage, trans->journal_res.seq, flags); + return true; +} + void bch2_mark_update(struct btree_trans *trans, struct btree_insert_entry *insert, struct bch_fs_usage *fs_usage, @@ -1049,57 +1099,23 @@ void bch2_mark_update(struct btree_trans *trans, if (!btree_node_type_needs_gc(iter->btree_id)) return; - if (!(trans->flags & BTREE_INSERT_NOMARK)) - bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, - bpos_min(insert->k->k.p, b->key.k.p).offset - - bkey_start_offset(&insert->k->k), - fs_usage, trans->journal_res.seq, flags); + bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, + bpos_min(insert->k->k.p, b->key.k.p).offset - + bkey_start_offset(&insert->k->k), + fs_usage, trans->journal_res.seq, flags); + + if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES)) + return; while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, KEY_TYPE_discard))) { struct bkey unpacked; - struct bkey_s_c k; - s64 sectors = 0; + struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); - k = bkey_disassemble(b, _k, &unpacked); - - if (btree_node_is_extents(b) - ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 - : bkey_cmp(insert->k->k.p, k.k->p)) + if (!bch2_mark_overwrite(trans, iter, k, insert->k, + fs_usage, flags)) break; - if (btree_node_is_extents(b)) { - switch (bch2_extent_overlap(&insert->k->k, k.k)) { - case BCH_EXTENT_OVERLAP_ALL: - sectors = -((s64) k.k->size); - break; - case BCH_EXTENT_OVERLAP_BACK: - sectors = bkey_start_offset(&insert->k->k) - - k.k->p.offset; - break; - case BCH_EXTENT_OVERLAP_FRONT: - sectors = bkey_start_offset(k.k) - - insert->k->k.p.offset; - break; - case BCH_EXTENT_OVERLAP_MIDDLE: - sectors = k.k->p.offset - insert->k->k.p.offset; - BUG_ON(sectors <= 0); - - bch2_mark_key_locked(c, k, true, sectors, - fs_usage, trans->journal_res.seq, - flags); - - sectors = bkey_start_offset(&insert->k->k) - - k.k->p.offset; - break; - } - - BUG_ON(sectors >= 0); - } - - bch2_mark_key_locked(c, k, false, sectors, - fs_usage, trans->journal_res.seq, flags); - bch2_btree_node_iter_advance(&node_iter, b); } } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 095015f17f76..90fffee1c289 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -254,6 +254,9 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c, int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *); +bool bch2_mark_overwrite(struct btree_trans *, struct btree_iter *, + struct bkey_s_c, struct bkey_i *, + struct bch_fs_usage *, unsigned); void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, struct bch_fs_usage *, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 5bfb38c4290f..d207ff7b98f4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -203,63 +203,94 @@ static void replay_now_at(struct journal *j, u64 seq) static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter *iter, *split_iter; /* - * We might cause compressed extents to be - * split, so we need to pass in a - * disk_reservation: + * We might cause compressed extents to be split, so we need to pass in + * a disk_reservation: */ struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); - BKEY_PADDED(k) split; + struct bkey_i *split; + bool split_compressed = false; + unsigned flags = BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_JOURNAL_REPLAY| + BTREE_INSERT_NOMARK; int ret; bch2_trans_init(&trans, c); + bch2_trans_preload_iters(&trans); +retry: + bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, bkey_start_pos(&k->k), BTREE_ITER_INTENT); + do { ret = bch2_btree_iter_traverse(iter); if (ret) - break; + goto err; - bkey_copy(&split.k, k); - bch2_cut_front(iter->pos, &split.k); - bch2_extent_trim_atomic(&split.k, iter); - - ret = bch2_disk_reservation_add(c, &disk_res, - split.k.k.size * - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&split.k)), - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); - - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k)); - ret = bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_JOURNAL_REPLAY); - } while ((!ret || ret == -EINTR) && - bkey_cmp(k->k.p, iter->pos)); + split_iter = bch2_trans_copy_iter(&trans, iter); + ret = PTR_ERR_OR_ZERO(split_iter); + if (ret) + goto err; - bch2_disk_reservation_put(c, &disk_res); + split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k)); + ret = PTR_ERR_OR_ZERO(split); + if (ret) + goto err; - /* - * This isn't strictly correct - we should only be relying on the btree - * node lock for synchronization with gc when we've got a write lock - * held. - * - * but - there are other correctness issues if btree gc were to run - * before journal replay finishes - */ - BUG_ON(c->gc_pos.phase); + if (!split_compressed && + bch2_extent_is_compressed(bkey_i_to_s_c(k)) && + !bch2_extent_is_atomic(k, split_iter)) { + ret = bch2_disk_reservation_add(c, &disk_res, + k->k.size * + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(k)), + BCH_DISK_RESERVATION_NOFAIL); + BUG_ON(ret); + + flags &= ~BTREE_INSERT_JOURNAL_REPLAY; + flags &= ~BTREE_INSERT_NOMARK; + flags |= BTREE_INSERT_NOMARK_OVERWRITES; + split_compressed = true; + } - bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size), - NULL, 0, 0); - bch2_trans_exit(&trans); + bkey_copy(split, k); + bch2_cut_front(split_iter->pos, split); + bch2_extent_trim_atomic(split, split_iter); - return ret; + bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split)); + bch2_btree_iter_set_pos(iter, split->k.p); + } while (bkey_cmp(iter->pos, k->k.p) < 0); + + ret = bch2_trans_commit(&trans, &disk_res, NULL, flags); + if (ret) + goto err; + + if (split_compressed) { + /* + * This isn't strictly correct - we should only be relying on + * the btree node lock for synchronization with gc when we've + * got a write lock held. + * + * but - there are other correctness issues if btree gc were to + * run before journal replay finishes + */ + BUG_ON(c->gc_pos.phase); + + bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size), + NULL, 0, 0); + } +err: + if (ret == -EINTR) + goto retry; + + bch2_disk_reservation_put(c, &disk_res); + + return bch2_trans_exit(&trans) ?: ret; } static int bch2_journal_replay(struct bch_fs *c, |