aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-07-21 19:05:06 -0800
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:09 -0400
commit271a3d3a4b30dcd9fd274a923fb382f5f113d279 (patch)
treeb16887dfafd9b011a14ae842f1029ce1211c9aa5 /fs
parent0fdf18047fd38e7b5cc6adba3a81704c88333e1c (diff)
bcachefs: lift ordering restriction on 0 size extents
This lifts the restriction that 0 size extents must not overlap with other extents, which means we can now sort extents and non extents the same way, and will let us simplify a bunch of other stuff as well. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/bset.c193
-rw-r--r--fs/bcachefs/bset.h68
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_io.c10
-rw-r--r--fs/bcachefs/btree_io.h9
-rw-r--r--fs/bcachefs/btree_iter.c175
-rw-r--r--fs/bcachefs/btree_types.h5
-rw-r--r--fs/bcachefs/btree_update_interior.c4
-rw-r--r--fs/bcachefs/btree_update_leaf.c9
-rw-r--r--fs/bcachefs/extents.c547
10 files changed, 475 insertions, 547 deletions
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index cf83911b3f5d..27fa3e230e6e 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -18,6 +18,9 @@
#include <linux/random.h>
#include <linux/prefetch.h>
+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
+ struct btree *);
+
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
{
unsigned offset = __btree_node_key_to_offset(b, k);
@@ -63,8 +66,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
_n = bkey_next(_k);
bch2_bkey_to_text(buf, sizeof(buf), &k);
- printk(KERN_ERR "block %u key %zi/%u: %s\n", set,
- _k->_data - i->_data, i->u64s, buf);
+ printk(KERN_ERR "block %u key %5u: %s\n", set,
+ __btree_node_key_to_offset(b, _k), buf);
if (_n == vstruct_last(i))
continue;
@@ -120,20 +123,6 @@ void bch2_dump_btree_node_iter(struct btree *b,
#ifdef CONFIG_BCACHEFS_DEBUG
-static bool keys_out_of_order(struct btree *b,
- const struct bkey_packed *prev,
- const struct bkey_packed *next,
- bool is_extents)
-{
- struct bkey nextu = bkey_unpack_key(b, next);
-
- return bkey_cmp_left_packed_byval(b, prev, bkey_start_pos(&nextu)) > 0 ||
- ((is_extents
- ? !bkey_deleted(next)
- : !bkey_deleted(prev)) &&
- !bkey_cmp_packed(b, prev, next));
-}
-
void __bch2_verify_btree_nr_keys(struct btree *b)
{
struct bset_tree *t;
@@ -150,16 +139,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
-static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
- struct btree *b,
- struct bkey_packed *k)
+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
+ struct btree *b)
{
- const struct bkey_packed *n = bch2_btree_node_iter_peek_all(iter, b);
+ struct btree_node_iter iter = *_iter;
+ const struct bkey_packed *k, *n;
+
+ k = bch2_btree_node_iter_peek_all(&iter, b);
+ __bch2_btree_node_iter_advance(&iter, b);
+ n = bch2_btree_node_iter_peek_all(&iter, b);
bkey_unpack_key(b, k);
if (n &&
- keys_out_of_order(b, k, n, iter->is_extents)) {
+ __btree_node_iter_cmp(b, k, n) > 0) {
+ struct btree_node_iter_set *set;
struct bkey ku = bkey_unpack_key(b, k);
struct bkey nu = bkey_unpack_key(b, n);
char buf1[80], buf2[80];
@@ -167,12 +161,22 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
bch2_dump_btree_node(b);
bch2_bkey_to_text(buf1, sizeof(buf1), &ku);
bch2_bkey_to_text(buf2, sizeof(buf2), &nu);
- panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2);
+ printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
+ buf1, buf2);
+ printk(KERN_ERR "iter was:");
+
+ btree_node_iter_for_each(_iter, set) {
+ struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
+ printk(" [%zi %zi]", t - b->set,
+ k->_data - bset(b, t)->_data);
+ }
+ panic("\n");
}
}
void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
- struct btree *b)
+ struct btree *b)
{
struct btree_node_iter_set *set, *s2;
struct bset_tree *t;
@@ -196,72 +200,72 @@ found:
/* Verify iterator is sorted: */
btree_node_iter_for_each(iter, set)
BUG_ON(set != iter->data &&
- btree_node_iter_cmp(iter, b, set[-1], set[0]) > 0);
+ btree_node_iter_cmp(b, set[-1], set[0]) > 0);
}
-void bch2_verify_key_order(struct btree *b,
- struct btree_node_iter *iter,
- struct bkey_packed *where)
+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
+ struct bkey_packed *insert, unsigned clobber_u64s)
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
- struct bkey_packed *k, *prev;
- struct bkey uk, uw = bkey_unpack_key(b, where);
-
- k = bch2_bkey_prev_all(b, t, where);
- if (k &&
- keys_out_of_order(b, k, where, iter->is_extents)) {
- char buf1[100], buf2[100];
+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
+ struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
+#if 0
+ BUG_ON(prev &&
+ __btree_node_iter_cmp(b, prev, insert) > 0);
+#else
+ if (prev &&
+ __btree_node_iter_cmp(b, prev, insert) > 0) {
+ struct bkey k1 = bkey_unpack_key(b, prev);
+ struct bkey k2 = bkey_unpack_key(b, insert);
+ char buf1[100];
+ char buf2[100];
bch2_dump_btree_node(b);
- uk = bkey_unpack_key(b, k);
- bch2_bkey_to_text(buf1, sizeof(buf1), &uk);
- bch2_bkey_to_text(buf2, sizeof(buf2), &uw);
- panic("out of order with prev:\n%s\n%s\n",
- buf1, buf2);
+ bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
+ bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
+
+ panic("prev > insert:\n"
+ "prev key %5u %s\n"
+ "insert key %5u %s\n",
+ __btree_node_key_to_offset(b, prev), buf1,
+ __btree_node_key_to_offset(b, insert), buf2);
}
+#endif
+#if 0
+ BUG_ON(next != btree_bkey_last(b, t) &&
+ __btree_node_iter_cmp(b, insert, next) > 0);
+#else
+ if (next != btree_bkey_last(b, t) &&
+ __btree_node_iter_cmp(b, insert, next) > 0) {
+ struct bkey k1 = bkey_unpack_key(b, insert);
+ struct bkey k2 = bkey_unpack_key(b, next);
+ char buf1[100];
+ char buf2[100];
- k = bkey_next(where);
- BUG_ON(k != btree_bkey_last(b, t) &&
- keys_out_of_order(b, where, k, iter->is_extents));
-
- for_each_bset(b, t) {
- if (where >= btree_bkey_first(b, t) ||
- where < btree_bkey_last(b, t))
- continue;
-
- k = bch2_btree_node_iter_bset_pos(iter, b, t);
-
- if (k == btree_bkey_last(b, t))
- k = bch2_bkey_prev_all(b, t, k);
-
- while (bkey_cmp_left_packed_byval(b, k, bkey_start_pos(&uw)) > 0 &&
- (prev = bch2_bkey_prev_all(b, t, k)))
- k = prev;
-
- for (;
- k != btree_bkey_last(b, t);
- k = bkey_next(k)) {
- uk = bkey_unpack_key(b, k);
-
- if (iter->is_extents) {
- BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
- bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
- } else {
- BUG_ON(!bkey_cmp(uw.p, uk.p) &&
- !bkey_deleted(&uk));
- }
-
- if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0)
- break;
- }
+ bch2_dump_btree_node(b);
+ bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
+ bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
+
+ panic("insert > next:\n"
+ "insert key %5u %s\n"
+ "next key %5u %s\n",
+ __btree_node_key_to_offset(b, insert), buf1,
+ __btree_node_key_to_offset(b, next), buf2);
}
+#endif
+}
+
+void bch2_verify_key_order(struct btree *b,
+ struct btree_node_iter *_iter,
+ struct bkey_packed *where)
+{
+ bch2_verify_insert_pos(b, where, where, where->u64s);
}
#else
static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
- struct btree *b,
- struct bkey_packed *k) {}
+ struct btree *b) {}
#endif
@@ -1229,6 +1233,7 @@ void bch2_bset_insert(struct btree *b,
struct bkey_packed packed, *src = bkey_to_packed(insert);
bch2_bset_verify_rw_aux_tree(b, t);
+ bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
if (bch2_bkey_pack_key(&packed, &insert->k, f))
src = &packed;
@@ -1255,7 +1260,6 @@ void bch2_bset_insert(struct btree *b,
bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
- bch2_verify_key_order(b, iter, where);
bch2_verify_btree_nr_keys(b);
}
@@ -1461,7 +1465,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
noinline __flatten __attribute__((cold))
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos search,
- bool strictly_greater, bool is_extents)
+ bool strictly_greater)
{
struct bset_tree *t;
@@ -1518,7 +1522,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
*/
void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct btree *b, struct bpos search,
- bool strictly_greater, bool is_extents)
+ bool strictly_greater)
{
struct bset_tree *t;
struct bkey_packed p, *packed_search = NULL;
@@ -1526,7 +1530,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
EBUG_ON(bkey_cmp(search, b->data->min_key) < 0);
bset_aux_tree_verify(b);
- __bch2_btree_node_iter_init(iter, is_extents);
+ memset(iter, 0, sizeof(*iter));
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
case BKEY_PACK_POS_EXACT:
@@ -1537,7 +1541,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
break;
case BKEY_PACK_POS_FAIL:
btree_node_iter_init_pack_failed(iter, b, search,
- strictly_greater, is_extents);
+ strictly_greater);
return;
}
@@ -1552,12 +1556,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
}
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
- struct btree *b,
- bool is_extents)
+ struct btree *b)
{
struct bset_tree *t;
- __bch2_btree_node_iter_init(iter, is_extents);
+ memset(iter, 0, sizeof(*iter));
for_each_bset(b, t)
__bch2_btree_node_iter_push(iter, b,
@@ -1585,7 +1588,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
{
bool ret;
- if ((ret = (btree_node_iter_cmp(iter, b,
+ if ((ret = (btree_node_iter_cmp(b,
iter->data[first],
iter->data[first + 1]) > 0)))
swap(iter->data[first], iter->data[first + 1]);
@@ -1640,23 +1643,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
btree_node_iter_sort_two(iter, b, 1);
}
-/**
- * bch_btree_node_iter_advance - advance @iter by one key
- *
- * Doesn't do debugchecks - for cases where (insert_fixup_extent()) a bset might
- * momentarily have out of order extents.
- */
void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b)
{
#ifdef CONFIG_BCACHEFS_DEBUG
- struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b);
-
- __bch2_btree_node_iter_advance(iter, b);
- bch2_btree_node_iter_next_check(iter, b, k);
-#else
- __bch2_btree_node_iter_advance(iter, b);
+ bch2_btree_node_iter_verify(iter, b);
+ bch2_btree_node_iter_next_check(iter, b);
#endif
+ __bch2_btree_node_iter_advance(iter, b);
}
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
@@ -1689,8 +1683,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
bch2_btree_node_iter_bset_pos(iter, b, t),
min_key_type);
if (k &&
- (!prev || __btree_node_iter_cmp(iter->is_extents, b,
- k, prev) > 0)) {
+ (!prev || __btree_node_iter_cmp(b, k, prev) > 0)) {
prev = k;
end = t->end_offset;
}
@@ -1723,11 +1716,11 @@ out:
struct btree_node_iter iter2 = *iter;
if (prev)
- bch2_btree_node_iter_advance(&iter2, b);
+ __bch2_btree_node_iter_advance(&iter2, b);
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
BUG_ON(k->type >= min_key_type);
- bch2_btree_node_iter_advance(&iter2, b);
+ __bch2_btree_node_iter_advance(&iter2, b);
}
}
diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
index 2fa71d7c0e8a..0787030ccc7e 100644
--- a/fs/bcachefs/bset.h
+++ b/fs/bcachefs/bset.h
@@ -370,6 +370,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
}
/* Returns true if @k is after iterator position @pos */
+static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
+ const struct bkey *k)
+{
+ int cmp = bkey_cmp(k->p, iter->pos);
+
+ return cmp > 0 ||
+ (cmp == 0 &&
+ !(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
+}
+
+/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
struct bpos *pos,
const struct bkey_packed *k,
@@ -419,7 +430,7 @@ enum bch_extent_overlap {
/* Returns how k overlaps with m */
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
- const struct bkey *m)
+ const struct bkey *m)
{
int cmp1 = bkey_cmp(k->p, m->p) < 0;
int cmp2 = bkey_cmp(bkey_start_pos(k),
@@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
/* Btree key iteration */
-static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
- bool is_extents)
-{
- iter->is_extents = is_extents;
- memset(iter->data, 0, sizeof(iter->data));
-}
-
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
- struct bpos, bool, bool);
+ struct bpos, bool);
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
- struct btree *, bool);
+ struct btree *);
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree *,
struct bset_tree *);
@@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
return __btree_node_iter_set_end(iter, 0);
}
-static inline int __btree_node_iter_cmp(bool is_extents,
- struct btree *b,
- struct bkey_packed *l,
- struct bkey_packed *r)
+static inline int __btree_node_iter_cmp(struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
{
- /*
- * For non extents, when keys compare equal the deleted keys have to
- * come first - so that bch2_btree_node_iter_next_check() can detect
- * duplicate nondeleted keys (and possibly other reasons?)
- *
- * For extents, bkey_deleted() is used as a proxy for k->size == 0, so
- * deleted keys have to sort last.
- */
+ /* When keys compare equal deleted keys come first */
return bkey_cmp_packed(b, l, r)
- ?: (is_extents
- ? (int) bkey_deleted(l) - (int) bkey_deleted(r)
- : (int) bkey_deleted(r) - (int) bkey_deleted(l))
+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
?: (l > r) - (l < r);
}
-static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
- struct btree *b,
+static inline int btree_node_iter_cmp(struct btree *b,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
- return __btree_node_iter_cmp(iter->is_extents, b,
+ return __btree_node_iter_cmp(b,
__btree_node_offset_to_key(b, l.k),
__btree_node_offset_to_key(b, r.k));
}
@@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
}
-/*
- * Iterates over all _live_ keys - skipping deleted (and potentially
- * overlapping) keys
- */
-#define for_each_btree_node_key(b, k, iter, _is_extents) \
- for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
- ((k) = bch2_btree_node_iter_peek(iter, b)); \
- bch2_btree_node_iter_advance(iter, b))
-
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree *,
struct bkey *);
-#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
- for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
+#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch2_btree_node_iter_advance(iter, b))
@@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
void __bch2_verify_btree_nr_keys(struct btree *);
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
+ struct bkey_packed *, unsigned);
void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
struct bkey_packed *);
@@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b) {}
+static inline void bch2_verify_insert_pos(struct btree *b,
+ struct bkey_packed *where,
+ struct bkey_packed *insert,
+ unsigned clobber_u64s) {}
static inline void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *iter,
struct bkey_packed *where) {}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 1fbb9c657fc6..2526118fe9ce 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter,
- btree_node_is_extents(b),
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
@@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter,
- btree_node_is_extents(b),
&unpacked) {
ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 5c36acef2b13..889870582566 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -22,7 +22,7 @@
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) \
- __btree_node_iter_cmp((iter)->is_extents, b, \
+ __btree_node_iter_cmp(b, \
__btree_node_offset_to_key(b, (_l).k), \
__btree_node_offset_to_key(b, (_r).k))
@@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
+ if (bkey_deleted(in))
+ continue;
+
EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_DISCARD);
@@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_bset_set_no_aux_tree(dst, dst->set);
- bch2_btree_node_iter_init_from_start(&src_iter, src,
- btree_node_is_extents(src));
+ bch2_btree_node_iter_init_from_start(&src_iter, src);
if (btree_node_ops(src)->key_normalize ||
btree_node_ops(src)->key_merge)
@@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
- __bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b));
+ iter->used = 0;
if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 0688ce420610..7835f8a9e3a0 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
/* Sorting */
struct btree_node_iter_large {
- u8 is_extents;
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
-static inline void
-__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter,
- bool is_extents)
-{
- iter->used = 0;
- iter->is_extents = is_extents;
-}
-
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree *);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 8918268f99f4..9d92826181dc 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter;
struct bkey_packed *k;
+ if (iter->uptodate > BTREE_ITER_NEED_PEEK)
+ return;
+
bch2_btree_node_iter_verify(&l->iter, b);
/*
* For interior nodes, the iterator will have skipped past
* deleted keys:
+ *
+ * For extents, the iterator may have skipped past deleted keys (but not
+ * whiteouts)
*/
- k = b->level
- ? bch2_btree_node_iter_prev(&tmp, b)
+ k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
+ ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
: bch2_btree_node_iter_prev_all(&tmp, b);
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
iter->flags & BTREE_ITER_IS_EXTENTS)) {
@@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
- panic("prev key should be before after pos:\n%s\n%llu:%llu\n",
+ panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
buf, iter->pos.inode, iter->pos.offset);
}
@@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk);
- panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
+ panic("iter should be after current key:\n"
+ "iter pos %llu:%llu\n"
+ "cur key %s\n",
iter->pos.inode, iter->pos.offset, buf);
}
- if (iter->uptodate == BTREE_ITER_UPTODATE &&
- (iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
- BUG_ON(!bkey_whiteout(&iter->k) &&
- bch2_btree_node_iter_end(&l->iter));
- }
+ BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
+ (iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS &&
+ !bkey_whiteout(&iter->k) &&
+ bch2_btree_node_iter_end(&l->iter));
}
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
@@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
__bch2_btree_iter_verify(linked, b);
}
+#else
+
+static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
+ struct btree *b) {}
+
#endif
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
@@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter_set *set;
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
- unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift;
+ unsigned old_end = t->end_offset - shift;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
@@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
}
return;
found:
- set->end = (int) set->end + shift;
+ set->end = t->end_offset;
/* Iterator hasn't gotten to the key that changed yet: */
if (set->k < offset)
@@ -517,8 +529,7 @@ iter_current_key_not_modified:
k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(node_iter, b, t));
if (k &&
- __btree_node_iter_cmp(node_iter, b,
- k, where) > 0) {
+ __btree_node_iter_cmp(b, k, where) > 0) {
struct btree_node_iter_set *set;
unsigned offset =
__btree_node_key_to_offset(b, bkey_next(k));
@@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s);
-
- /* interior node iterators are... special... */
- if (!b->level)
- bch2_btree_iter_verify(iter, b);
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
@@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
-/* Returns true if @k is after iterator position @pos */
-static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
- const struct bkey *k)
-{
- int cmp = bkey_cmp(k->p, iter->pos);
-
- return cmp > 0 ||
- (cmp == 0 &&
- !(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
-}
-
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
@@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter,
struct btree_iter_level *l = &iter->l[b->level];
bch2_btree_node_iter_init(&l->iter, b, iter->pos,
- iter->flags & BTREE_ITER_IS_EXTENTS,
- btree_node_is_extents(b));
+ iter->flags & BTREE_ITER_IS_EXTENTS);
/* Skip to first non whiteout: */
if (b->level)
@@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_NEED_PEEK;
+
bch2_btree_iter_verify_locks(iter);
+ __bch2_btree_iter_verify(iter, iter->l[iter->level].b);
return 0;
}
@@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
}
static inline struct bkey_s_c
-__bch2_btree_iter_peek_slot(struct btree_iter *iter)
+__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
+ struct btree_node_iter node_iter;
struct bkey_s_c k;
struct bkey n;
int ret;
@@ -1377,6 +1375,17 @@ recheck:
__btree_iter_advance(l);
/*
+ * iterator is now at the correct position for inserting at iter->pos,
+ * but we need to keep iterating until we find the first non whiteout so
+ * we know how big a hole we have, if any:
+ */
+
+ node_iter = l->iter;
+ if (k.k && bkey_whiteout(k.k))
+ k = __btree_iter_unpack(iter, l, &iter->k,
+ bch2_btree_node_iter_peek(&node_iter, l->b));
+
+ /*
* If we got to the end of the node, check if we need to traverse to the
* next node:
*/
@@ -1392,6 +1401,13 @@ recheck:
if (k.k &&
!bkey_whiteout(k.k) &&
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
+ /*
+ * if we skipped forward to find the first non whiteout and
+ * there _wasn't_ actually a hole, we want the iterator to be
+ * pointed at the key we found:
+ */
+ l->iter = node_iter;
+
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE;
@@ -1399,41 +1415,88 @@ recheck:
}
/* hole */
+
+ /* holes can't span inode numbers: */
+ if (iter->pos.offset == KEY_OFFSET_MAX) {
+ if (iter->pos.inode == KEY_INODE_MAX)
+ return bkey_s_c_null;
+
+ iter->pos = bkey_successor(iter->pos);
+ goto recheck;
+ }
+
+ if (!k.k)
+ k.k = &l->b->key.k;
+
bkey_init(&n);
n.p = iter->pos;
+ bch2_key_resize(&n,
+ min_t(u64, KEY_SIZE_MAX,
+ (k.k->p.inode == n.p.inode
+ ? bkey_start_offset(k.k)
+ : KEY_OFFSET_MAX) -
+ n.p.offset));
+
+ //EBUG_ON(!n.size);
+ if (!n.size) {
+ char buf[100];
+ bch2_dump_btree_node(iter->l[0].b);
+
+ bch2_bkey_to_text(buf, sizeof(buf), k.k);
+ panic("iter at %llu:%llu\n"
+ "next key %s\n",
+ iter->pos.inode,
+ iter->pos.offset,
+ buf);
+ }
- if (iter->flags & BTREE_ITER_IS_EXTENTS) {
- if (n.p.offset == KEY_OFFSET_MAX) {
- if (n.p.inode == KEY_INODE_MAX)
- return bkey_s_c_null;
-
- iter->pos = bkey_successor(iter->pos);
- goto recheck;
- }
+ iter->k = n;
+ iter->uptodate = BTREE_ITER_UPTODATE;
+ return (struct bkey_s_c) { &iter->k, NULL };
+}
- if (k.k && bkey_whiteout(k.k)) {
- struct btree_node_iter node_iter = l->iter;
+static inline struct bkey_s_c
+__bch2_btree_iter_peek_slot(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+ struct bkey_s_c k;
+ int ret;
- k = __btree_iter_unpack(iter, l, &iter->k,
- bch2_btree_node_iter_peek(&node_iter, l->b));
- }
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ return __bch2_btree_iter_peek_slot_extents(iter);
- if (!k.k)
- k.k = &l->b->key.k;
+recheck:
+ while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
+ bkey_deleted(k.k) &&
+ bkey_cmp(k.k->p, iter->pos) == 0)
+ __btree_iter_advance(l);
- bch2_key_resize(&n,
- min_t(u64, KEY_SIZE_MAX,
- (k.k->p.inode == n.p.inode
- ? bkey_start_offset(k.k)
- : KEY_OFFSET_MAX) -
- n.p.offset));
+ /*
+ * If we got to the end of the node, check if we need to traverse to the
+ * next node:
+ */
+ if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) {
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
- EBUG_ON(!n.size);
+ goto recheck;
}
- iter->k = n;
- iter->uptodate = BTREE_ITER_UPTODATE;
- return (struct bkey_s_c) { &iter->k, NULL };
+ if (k.k &&
+ !bkey_deleted(k.k) &&
+ !bkey_cmp(iter->pos, k.k->p)) {
+ iter->uptodate = BTREE_ITER_UPTODATE;
+ return k;
+ } else {
+ /* hole */
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+
+ iter->uptodate = BTREE_ITER_UPTODATE;
+ return (struct bkey_s_c) { &iter->k, NULL };
+ }
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 5376388e91e6..d57ca3d08c16 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -176,8 +176,6 @@ struct btree_cache {
};
struct btree_node_iter {
- u8 is_extents;
-
struct btree_node_iter_set {
u16 k, end;
} data[MAX_BSETS];
@@ -459,9 +457,6 @@ struct btree_root {
* we're holding the write lock and we know what key is about to be overwritten:
*/
-struct btree_iter;
-struct btree_node_iter;
-
enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index b60eb3d33c7b..1fe6f1e3e843 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b)
BUG_ON(!b->level);
- bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false);
+ bch2_btree_node_iter_init(&iter, b, b->key.k.p, false);
#if 1
BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
bkey_cmp_left_packed(b, k, &b->key.k.p));
@@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
- bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false);
+ bch2_btree_node_iter_init(&node_iter, b, k->k.p, false);
while (!bch2_keylist_empty(keys)) {
k = bch2_keylist_front(keys);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index ce0223bd52b5..0ef519e8feed 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -64,7 +64,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter, t,
- k, clobber_u64s, 0);
+ k, clobber_u64s, 0);
+ bch2_btree_iter_verify(iter, b);
return true;
}
@@ -73,7 +74,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_DELETED;
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
- k->u64s, k->u64s);
+ k->u64s, k->u64s);
+ bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
@@ -98,7 +100,8 @@ overwrite:
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
- clobber_u64s, k->u64s);
+ clobber_u64s, k->u64s);
+ bch2_btree_iter_verify(iter, b);
return true;
}
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 803272b10e61..df04af882c16 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -858,30 +858,34 @@ void bch2_key_resize(struct bkey *k,
* that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary.
*/
-static bool __extent_save(struct btree *b, struct btree_node_iter *iter,
- struct bkey_packed *dst, struct bkey *src)
+static void extent_save(struct btree *b, struct bkey_packed *dst,
+ struct bkey *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
- bool ret;
- if ((dst_unpacked = packed_to_bkey(dst))) {
+ if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = *src;
- ret = true;
- } else {
- ret = bch2_bkey_pack_key(dst, src, f);
- }
-
- if (ret && iter)
- bch2_verify_key_order(b, iter, dst);
-
- return ret;
+ else
+ BUG_ON(!bch2_bkey_pack_key(dst, src, f));
}
-static void extent_save(struct btree *b, struct btree_node_iter *iter,
- struct bkey_packed *dst, struct bkey *src)
+static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
+ struct bkey_i *src)
{
- BUG_ON(!__extent_save(b, iter, dst, src));
+ struct bkey_format *f = &b->format;
+ struct bkey_i *dst_unpacked;
+ struct bkey_packed tmp;
+
+ if ((dst_unpacked = packed_to_bkey(dst)))
+ dst_unpacked->k = src->k;
+ else if (bch2_bkey_pack_key(&tmp, &src->k, f))
+ memcpy_u64s(dst, &tmp, f->key_u64s);
+ else
+ return false;
+
+ memcpy_u64s(bkeyp_val(f, dst), &src->v, bkey_val_u64s(&src->k));
+ return true;
}
/*
@@ -1010,7 +1014,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
sort_key_next(iter, b, _r);
} else {
__bch2_cut_front(l.k->p, r);
- extent_save(b, NULL, rk, r.k);
+ extent_save(b, rk, r.k);
}
extent_sort_sift(iter, b, _r - iter->data);
@@ -1024,7 +1028,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
__bch2_cut_front(r.k->p, l);
- extent_save(b, NULL, lk, l.k);
+ extent_save(b, lk, l.k);
extent_sort_sift(iter, b, 0);
@@ -1032,7 +1036,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bkey_to_packed(&tmp.k));
} else {
bch2_cut_back(bkey_start_pos(r.k), l.k);
- extent_save(b, NULL, lk, l.k);
+ extent_save(b, lk, l.k);
}
}
@@ -1135,6 +1139,55 @@ extent_insert_should_stop(struct extent_insert_state *s)
return BTREE_INSERT_OK;
}
+static void verify_extent_nonoverlapping(struct btree *b,
+ struct btree_node_iter *_iter,
+ struct bkey_i *insert)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct btree_node_iter iter;
+ struct bkey_packed *k;
+ struct bkey uk;
+
+ iter = *_iter;
+ k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD);
+ BUG_ON(k &&
+ (uk = bkey_unpack_key(b, k),
+ bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
+
+ iter = *_iter;
+ k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD);
+#if 0
+ BUG_ON(k &&
+ (uk = bkey_unpack_key(b, k),
+ bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
+#else
+ if (k &&
+ (uk = bkey_unpack_key(b, k),
+ bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
+ char buf1[100];
+ char buf2[100];
+
+ bch2_bkey_to_text(buf1, sizeof(buf1), &insert->k);
+ bch2_bkey_to_text(buf2, sizeof(buf2), &uk);
+
+ bch2_dump_btree_node(b);
+ panic("insert > next :\n"
+ "insert %s\n"
+ "next %s\n",
+ buf1, buf2);
+ }
+#endif
+
+#endif
+}
+
+static void verify_modified_extent(struct btree_iter *iter,
+ struct bkey_packed *k)
+{
+ bch2_btree_iter_verify(iter, iter->l[0].b);
+ bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
+}
+
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert)
{
@@ -1148,6 +1201,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
unsigned clobber_u64s;
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+ verify_extent_nonoverlapping(l->b, &l->iter, insert);
+
+ if (!prev) {
+ while ((prev = bch2_bkey_prev_all(l->b, t, where)) &&
+ (bkey_cmp_left_packed(l->b, prev, &insert->k.p) ?:
+ ((int) bkey_deleted(&insert->k) - (int) bkey_deleted(prev))) > 0)
+ where = prev;
+ }
if (prev)
where = bkey_next(prev);
@@ -1173,12 +1234,15 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where,
- clobber_u64s, where->u64s);
+ clobber_u64s, where->u64s);
+ bch2_verify_key_order(l->b, &l->iter, where);
+ bch2_btree_iter_verify(iter, l->b);
return;
drop_deleted_keys:
bch2_bset_delete(l->b, where, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
where, clobber_u64s, 0);
+ bch2_btree_iter_verify(iter, l->b);
}
static void extent_insert_committed(struct extent_insert_state *s)
@@ -1226,8 +1290,10 @@ static void extent_insert_committed(struct extent_insert_state *s)
bch2_btree_journal_key(s->trans, iter, &split.k);
- if (!s->deleting)
+ if (!s->deleting) {
+ bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
extent_bset_insert(c, iter, &split.k);
+ }
done:
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
@@ -1345,22 +1411,21 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
- struct btree_node_iter *node_iter = &l->iter;
- enum btree_insert_ret ret;
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
- extent_save(b, node_iter, _k, k.k);
+ extent_save(b, _k, k.k);
+ bch2_verify_key_order(b, &l->iter, _k);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k);
BUG_ON(bkey_deleted(k.k));
- extent_save(b, node_iter, _k, k.k);
+ extent_save(b, _k, k.k);
/*
* As the auxiliary tree is indexed by the end of the
@@ -1368,46 +1433,31 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
* auxiliary tree.
*/
bch2_bset_fix_invalidated_key(b, t, _k);
- bch2_btree_node_iter_fix(iter, b, node_iter, t,
- _k, _k->u64s, _k->u64s);
+ bch2_btree_node_iter_fix(iter, b, &l->iter, t,
+ _k, _k->u64s, _k->u64s);
+ bch2_verify_key_order(b, &l->iter, _k);
break;
case BCH_EXTENT_OVERLAP_ALL: {
- struct bpos orig_pos = k.k->p;
-
/* The insert key completely covers k, invalidate k */
if (!bkey_whiteout(k.k))
btree_keys_account_key_drop(&b->nr,
t - b->set, _k);
bch2_drop_subtract(s, k);
- k.k->p = bkey_start_pos(&insert->k);
- if (!__extent_save(b, node_iter, _k, k.k)) {
- /*
- * Couldn't repack: we aren't necessarily able
- * to repack if the new key is outside the range
- * of the old extent, so we have to split
- * @insert:
- */
- k.k->p = orig_pos;
- extent_save(b, node_iter, _k, k.k);
- ret = extent_insert_advance_pos(s, k.s_c);
- if (ret != BTREE_INSERT_OK)
- return ret;
+ if (t == bset_tree_last(l->b)) {
+ unsigned u64s = _k->u64s;
- extent_insert_committed(s);
- /*
- * We split and inserted upto at k.k->p - that
- * has to coincide with iter->pos, so that we
- * don't have anything more we have to insert
- * until we recheck our journal reservation:
- */
- EBUG_ON(bkey_cmp(s->committed, k.k->p));
+ bch2_bset_delete(l->b, _k, _k->u64s);
+ bch2_btree_node_iter_fix(iter, b, &l->iter, t,
+ _k, u64s, 0);
+ bch2_btree_iter_verify(iter, b);
} else {
- bch2_bset_fix_invalidated_key(b, t, _k);
- bch2_btree_node_iter_fix(iter, b, node_iter, t,
- _k, _k->u64s, _k->u64s);
+ extent_save(b, _k, k.k);
+ bch2_btree_node_iter_fix(iter, b, &l->iter, t,
+ _k, _k->u64s, _k->u64s);
+ bch2_verify_key_order(b, &l->iter, _k);
}
break;
@@ -1436,7 +1486,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
- extent_save(b, node_iter, _k, k.k);
+ extent_save(b, _k, k.k);
+ bch2_verify_key_order(b, &l->iter, _k);
bch2_add_sectors(s, bkey_i_to_s_c(&split.k),
bkey_start_offset(&split.k.k),
@@ -1450,26 +1501,20 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
}
static enum btree_insert_ret
-__bch2_delete_fixup_extent(struct extent_insert_state *s)
+__bch2_insert_fixup_extent(struct extent_insert_state *s)
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
- struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
enum btree_insert_ret ret = BTREE_INSERT_OK;
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
-
- s->whiteout = *insert;
- s->whiteout.k.type = KEY_TYPE_DISCARD;
-
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
- (_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
+ (_k = bch2_btree_node_iter_peek_filter(&l->iter, b, KEY_TYPE_DISCARD))) {
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
enum bch_extent_overlap overlap;
@@ -1480,112 +1525,92 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
- if (bkey_whiteout(k.k)) {
- s->committed = bpos_min(insert->k.p, k.k->p);
- goto next;
- }
-
- overlap = bch2_extent_overlap(&insert->k, k.k);
-
ret = extent_insert_advance_pos(s, k.s_c);
if (ret)
break;
- s->do_journal = true;
+ overlap = bch2_extent_overlap(&insert->k, k.k);
- if (overlap == BCH_EXTENT_OVERLAP_ALL) {
- btree_keys_account_key_drop(&b->nr,
- t - b->set, _k);
- bch2_subtract_sectors(s, k.s_c,
- bkey_start_offset(k.k), k.k->size);
- _k->type = KEY_TYPE_DISCARD;
- reserve_whiteout(b, _k);
- } else if (k.k->needs_whiteout ||
- bkey_written(b, _k)) {
- struct bkey_i discard = *insert;
-
- discard.k.type = KEY_TYPE_DISCARD;
-
- switch (overlap) {
- case BCH_EXTENT_OVERLAP_FRONT:
- bch2_cut_front(bkey_start_pos(k.k), &discard);
- break;
- case BCH_EXTENT_OVERLAP_BACK:
- bch2_cut_back(k.k->p, &discard.k);
- break;
- default:
- break;
- }
+ if (!s->deleting) {
+ if (k.k->needs_whiteout || bkey_written(b, _k))
+ insert->k.needs_whiteout = true;
- discard.k.needs_whiteout = true;
+ if (overlap == BCH_EXTENT_OVERLAP_ALL &&
+ bkey_whiteout(k.k) &&
+ k.k->needs_whiteout) {
+ unreserve_whiteout(b, _k);
+ _k->needs_whiteout = false;
+ }
ret = extent_squash(s, insert, t, _k, k, overlap);
- BUG_ON(ret != BTREE_INSERT_OK);
-
- extent_bset_insert(c, iter, &discard);
} else {
- ret = extent_squash(s, insert, t, _k, k, overlap);
- BUG_ON(ret != BTREE_INSERT_OK);
- }
-next:
- bch2_cut_front(s->committed, insert);
- bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
- }
-
- return ret;
-}
-
-static enum btree_insert_ret
-__bch2_insert_fixup_extent(struct extent_insert_state *s)
-{
- struct btree_iter *iter = s->insert->iter;
- struct btree_iter_level *l = &iter->l[0];
- struct btree *b = l->b;
- struct btree_node_iter *node_iter = &l->iter;
- struct bkey_packed *_k;
- struct bkey unpacked;
- struct bkey_i *insert = s->insert->k;
- enum btree_insert_ret ret = BTREE_INSERT_OK;
-
- while (bkey_cmp(s->committed, insert->k.p) < 0 &&
- (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
- (_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
- struct bset_tree *t = bch2_bkey_to_bset(b, _k);
- struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
- enum bch_extent_overlap overlap;
-
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
- EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
+ if (bkey_whiteout(k.k))
+ goto next;
+
+ s->do_journal = true;
+
+ if (overlap == BCH_EXTENT_OVERLAP_ALL) {
+ btree_keys_account_key_drop(&b->nr,
+ t - b->set, _k);
+ bch2_subtract_sectors(s, k.s_c,
+ bkey_start_offset(k.k), k.k->size);
+ _k->type = KEY_TYPE_DISCARD;
+ reserve_whiteout(b, _k);
+ } else if (k.k->needs_whiteout ||
+ bkey_written(b, _k)) {
+ struct bkey_i discard = *insert;
+
+ discard.k.type = KEY_TYPE_DISCARD;
+
+ switch (overlap) {
+ case BCH_EXTENT_OVERLAP_FRONT:
+ bch2_cut_front(bkey_start_pos(k.k), &discard);
+ break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ bch2_cut_back(k.k->p, &discard.k);
+ break;
+ default:
+ break;
+ }
- if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
- break;
+ discard.k.needs_whiteout = true;
- overlap = bch2_extent_overlap(&insert->k, k.k);
+ ret = extent_squash(s, insert, t, _k, k, overlap);
+ BUG_ON(ret != BTREE_INSERT_OK);
- if (!k.k->size)
- goto squash;
+ extent_bset_insert(c, iter, &discard);
+ } else {
+ ret = extent_squash(s, insert, t, _k, k, overlap);
+ BUG_ON(ret != BTREE_INSERT_OK);
+ }
+next:
+ bch2_cut_front(s->committed, insert);
+ bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
+ }
- /*
- * Only call advance pos & call hook for nonzero size extents:
- */
- ret = extent_insert_advance_pos(s, k.s_c);
- if (ret)
+ if (ret != BTREE_INSERT_OK ||
+ overlap == BCH_EXTENT_OVERLAP_FRONT ||
+ overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
+ }
- if (k.k->size &&
- (k.k->needs_whiteout || bkey_written(b, _k)))
- insert->k.needs_whiteout = true;
+ if (ret == BTREE_INSERT_OK &&
+ bkey_cmp(s->committed, insert->k.p) < 0)
+ ret = extent_insert_advance_pos(s, bkey_s_c_null);
- if (overlap == BCH_EXTENT_OVERLAP_ALL &&
- bkey_whiteout(k.k) &&
- k.k->needs_whiteout) {
- unreserve_whiteout(b, _k);
- _k->needs_whiteout = false;
- }
-squash:
- ret = extent_squash(s, insert, t, _k, k, overlap);
- if (ret != BTREE_INSERT_OK)
- break;
+ /*
+ * may have skipped past some deleted extents greater than the insert
+ * key, before we got to a non deleted extent and knew we could bail out
+ * rewind the iterator a bit if necessary:
+ */
+ {
+ struct btree_node_iter node_iter = l->iter;
+ struct bkey uk;
+
+ while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
+ (uk = bkey_unpack_key(l->b, _k),
+ bkey_cmp(uk.p, s->committed) > 0))
+ l->iter = node_iter;
}
return ret;
@@ -1647,6 +1672,11 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
.deleting = bkey_whiteout(&insert->k->k),
};
+ if (s.deleting) {
+ s.whiteout = *insert->k;
+ s.whiteout.k.type = KEY_TYPE_DISCARD;
+ }
+
EBUG_ON(iter->level);
EBUG_ON(!insert->k->k.size);
@@ -1657,6 +1687,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
* @insert->k and the node iterator that we're advancing:
*/
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
+ bch2_btree_iter_verify(iter, b);
if (!s.deleting &&
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
@@ -1664,13 +1695,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
bkey_start_offset(&insert->k->k),
insert->k->k.size);
- ret = !s.deleting
- ? __bch2_insert_fixup_extent(&s)
- : __bch2_delete_fixup_extent(&s);
-
- if (ret == BTREE_INSERT_OK &&
- bkey_cmp(s.committed, insert->k->k.p) < 0)
- ret = extent_insert_advance_pos(&s, bkey_s_c_null);
+ ret = __bch2_insert_fixup_extent(&s);
extent_insert_committed(&s);
@@ -2172,130 +2197,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
return BCH_MERGE_MERGE;
}
-static void extent_i_save(struct btree *b, struct bkey_packed *dst,
- struct bkey_i *src)
-{
- struct bkey_format *f = &b->format;
- struct bkey_i *dst_unpacked;
-
- BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
-
- /*
- * We don't want the bch2_verify_key_order() call in extent_save(),
- * because we may be out of order with deleted keys that are about to be
- * removed by extent_bset_insert()
- */
-
- if ((dst_unpacked = packed_to_bkey(dst)))
- bkey_copy(dst_unpacked, src);
- else
- BUG_ON(!bch2_bkey_pack(dst, src, f));
-}
-
-static bool extent_merge_one_overlapping(struct btree_iter *iter,
- struct bpos new_pos,
- struct bset_tree *t,
- struct bkey_packed *k, struct bkey uk,
- bool check, bool could_pack)
-{
- struct btree_iter_level *l = &iter->l[0];
-
- BUG_ON(!bkey_deleted(k));
-
- if (check) {
- return !bkey_packed(k) || could_pack;
- } else {
- uk.p = new_pos;
- extent_save(l->b, &l->iter, k, &uk);
- bch2_bset_fix_invalidated_key(l->b, t, k);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
- k, k->u64s, k->u64s);
- return true;
- }
-}
-
-static bool extent_merge_do_overlapping(struct btree_iter *iter,
- struct bkey *m, bool back_merge)
-{
- struct btree_iter_level *l = &iter->l[0];
- struct btree *b = l->b;
- struct btree_node_iter *node_iter = &l->iter;
- struct bset_tree *t;
- struct bkey_packed *k;
- struct bkey uk;
- struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m);
- bool could_pack = bkey_pack_pos((void *) &uk, new_pos, b);
- bool check = true;
-
- /*
- * @m is the new merged extent:
- *
- * The merge took place in the last bset; we know there can't be any 0
- * size extents overlapping with m there because if so they would have
- * been between the two extents we merged.
- *
- * But in the other bsets, we have to check for and fix such extents:
- */
-do_fixup:
- for_each_bset(b, t) {
- if (t == bset_tree_last(b))
- break;
-
- /*
- * if we don't find this bset in the iterator we already got to
- * the end of that bset, so start searching from the end.
- */
- k = bch2_btree_node_iter_bset_pos(node_iter, b, t);
-
- if (k == btree_bkey_last(b, t))
- k = bch2_bkey_prev_all(b, t, k);
- if (!k)
- continue;
-
- if (back_merge) {
- /*
- * Back merge: 0 size extents will be before the key
- * that was just inserted (and thus the iterator
- * position) - walk backwards to find them
- */
- for (;
- k &&
- (uk = bkey_unpack_key(b, k),
- bkey_cmp(uk.p, bkey_start_pos(m)) > 0);
- k = bch2_bkey_prev_all(b, t, k)) {
- if (bkey_cmp(uk.p, m->p) >= 0)
- continue;
-
- if (!extent_merge_one_overlapping(iter, new_pos,
- t, k, uk, check, could_pack))
- return false;
- }
- } else {
- /* Front merge - walk forwards */
- for (;
- k != btree_bkey_last(b, t) &&
- (uk = bkey_unpack_key(b, k),
- bkey_cmp(uk.p, m->p) < 0);
- k = bkey_next(k)) {
- if (bkey_cmp(uk.p,
- bkey_start_pos(m)) <= 0)
- continue;
-
- if (!extent_merge_one_overlapping(iter, new_pos,
- t, k, uk, check, could_pack))
- return false;
- }
- }
- }
-
- if (check) {
- check = false;
- goto do_fixup;
- }
-
- return true;
-}
-
/*
* When merging an extent that we're inserting into a btree node, the new merged
* extent could overlap with an existing 0 size extent - if we don't fix that,
@@ -2312,13 +2213,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
{
struct btree *b = iter->l[0].b;
struct btree_node_iter *node_iter = &iter->l[0].iter;
- const struct bkey_format *f = &b->format;
- struct bset_tree *t = bset_tree_last(b);
- struct bkey_packed *m;
- BKEY_PADDED(k) li;
- BKEY_PADDED(k) ri;
- struct bkey_i *mi;
- struct bkey tmp;
+ BKEY_PADDED(k) li, ri;
+ struct bkey_packed *m = back_merge ? l : r;
+ struct bkey_i *mi = back_merge ? &li.k : &ri.k;
+ struct bset_tree *t = bch2_bkey_to_bset(b, m);
+ enum merge_result ret;
+
+ EBUG_ON(bkey_written(b, m));
/*
* We need to save copies of both l and r, because we might get a
@@ -2327,57 +2228,49 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bkey_unpack(b, &li.k, l);
bch2_bkey_unpack(b, &ri.k, r);
- m = back_merge ? l : r;
- mi = back_merge ? &li.k : &ri.k;
+ ret = bch2_extent_merge(c, b, &li.k, &ri.k);
+ if (ret == BCH_MERGE_NOMERGE)
+ return false;
- /* l & r should be in last bset: */
- EBUG_ON(bch2_bkey_to_bset(b, m) != t);
+ /*
+ * check if we overlap with deleted extents - would break the sort
+ * order:
+ */
+ if (back_merge) {
+ struct bkey_packed *n = bkey_next(m);
- switch (bch2_extent_merge(c, b, &li.k, &ri.k)) {
- case BCH_MERGE_NOMERGE:
- return false;
- case BCH_MERGE_PARTIAL:
- if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &mi->k, f))
+ if (n != btree_bkey_last(b, t) &&
+ bkey_cmp_left_packed(b, n, &li.k.k.p) <= 0 &&
+ bkey_deleted(n))
return false;
+ } else if (ret == BCH_MERGE_MERGE) {
+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
- if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
+ if (prev &&
+ bkey_cmp_left_packed_byval(b, prev,
+ bkey_start_pos(&li.k.k)) > 0)
return false;
+ }
- extent_i_save(b, m, mi);
- bch2_bset_fix_invalidated_key(b, t, m);
-
- /*
- * Update iterator to reflect what we just inserted - otherwise,
- * the iter_fix() call is going to put us _before_ the key we
- * just partially merged with:
- */
- if (back_merge)
- bch2_btree_iter_set_pos_same_leaf(iter, li.k.k.p);
-
- bch2_btree_node_iter_fix(iter, b, node_iter,
- t, m, m->u64s, m->u64s);
+ if (ret == BCH_MERGE_PARTIAL) {
+ if (!extent_i_save(b, m, mi))
+ return false;
if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k);
else
bkey_copy(packed_to_bkey(r), &ri.k);
- return false;
- case BCH_MERGE_MERGE:
- if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &li.k.k, f))
- return false;
-
- if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
+ } else {
+ if (!extent_i_save(b, m, &li.k))
return false;
+ }
- extent_i_save(b, m, &li.k);
- bch2_bset_fix_invalidated_key(b, t, m);
+ bch2_bset_fix_invalidated_key(b, t, m);
+ bch2_btree_node_iter_fix(iter, b, node_iter,
+ t, m, m->u64s, m->u64s);
+ verify_modified_extent(iter, m);
- bch2_btree_node_iter_fix(iter, b, node_iter,
- t, m, m->u64s, m->u64s);
- return true;
- default:
- BUG();
- }
+ return ret == BCH_MERGE_MERGE;
}
int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)