aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_cache.c')
-rw-r--r--fs/bcachefs/btree_cache.c170
1 files changed, 107 insertions, 63 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 5e5858191905..02c70e813fac 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bbpos.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
@@ -9,6 +10,7 @@
#include "debug.h"
#include "errcode.h"
#include "error.h"
+#include "journal.h"
#include "trace.h"
#include <linux/prefetch.h>
@@ -59,7 +61,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
clear_btree_node_just_written(b);
- kvpfree(b->data, btree_bytes(c));
+ kvfree(b->data);
b->data = NULL;
#ifdef __KERNEL__
kvfree(b->aux_data);
@@ -93,7 +95,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
{
BUG_ON(b->data || b->aux_data);
- b->data = kvpmalloc(btree_bytes(c), gfp);
+ b->data = kvmalloc(btree_buf_bytes(b), gfp);
if (!b->data)
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
#ifdef __KERNEL__
@@ -106,7 +108,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
b->aux_data = NULL;
#endif
if (!b->aux_data) {
- kvpfree(b->data, btree_bytes(c));
+ kvfree(b->data);
b->data = NULL;
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
}
@@ -125,7 +127,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
bkey_btree_ptr_init(&b->key);
INIT_LIST_HEAD(&b->list);
INIT_LIST_HEAD(&b->write_blocked);
- b->byte_order = ilog2(btree_bytes(c));
+ b->byte_order = ilog2(c->opts.btree_node_size);
return b;
}
@@ -207,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
int ret = 0;
lockdep_assert_held(&bc->lock);
+
+ struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
+
+ u64 mask = b->c.level
+ ? bc->pinned_nodes_interior_mask
+ : bc->pinned_nodes_leaf_mask;
+
+ if ((mask & BIT_ULL(b->c.btree_id)) &&
+ bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
+ bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
wait_on_io:
if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)|
@@ -407,7 +421,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
if (c->verify_data)
list_move(&c->verify_data->list, &bc->live);
- kvpfree(c->verify_ondisk, btree_bytes(c));
+ kvfree(c->verify_ondisk);
for (i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
@@ -424,14 +438,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
BUG_ON(btree_node_read_in_flight(b) ||
btree_node_write_in_flight(b));
- if (btree_node_dirty(b))
- bch2_btree_complete_write(c, b, btree_current_write(b));
- clear_btree_node_dirty_acct(c, b);
-
btree_node_data_free(c, b);
}
- BUG_ON(atomic_read(&c->btree_cache.dirty));
+ BUG_ON(!bch2_journal_error(&c->journal) &&
+ atomic_read(&c->btree_cache.dirty));
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
@@ -472,7 +483,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
mutex_init(&c->verify_lock);
- shrink = shrinker_alloc(0, "%s/btree_cache", c->name);
+ shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
if (!shrink)
goto err;
bc->shrink = shrink;
@@ -502,19 +513,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
* cannibalize_bucket() will take. This means every time we unlock the root of
* the btree, we need to release this lock if we have it held.
*/
-void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
if (bc->alloc_lock == current) {
- trace_and_count(c, btree_cache_cannibalize_unlock, c);
+ trace_and_count(c, btree_cache_cannibalize_unlock, trans);
bc->alloc_lock = NULL;
closure_wake_up(&bc->alloc_wait);
}
}
-int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct task_struct *old;
@@ -523,7 +536,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
goto success;
if (!cl) {
- trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+ trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
}
@@ -537,11 +550,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
goto success;
}
- trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
+ trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
success:
- trace_and_count(c, btree_cache_cannibalize_lock, c);
+ trace_and_count(c, btree_cache_cannibalize_lock, trans);
return 0;
}
@@ -675,7 +688,7 @@ err:
mutex_unlock(&bc->lock);
- trace_and_count(c, btree_cache_cannibalize, c);
+ trace_and_count(c, btree_cache_cannibalize, trans);
goto out;
}
@@ -696,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- u32 seq;
- BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ if (unlikely(level >= BTREE_MAX_DEPTH)) {
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+ level, BTREE_MAX_DEPTH);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
@@ -711,6 +746,9 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
b = bch2_btree_node_mem_alloc(trans, level != 0);
if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
+ if (!path)
+ return b;
+
trans->memory_allocation_failure = true;
trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
@@ -719,12 +757,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
if (IS_ERR(b))
return b;
- /*
- * Btree nodes read in from disk should not have the accessed bit set
- * initially, so that linear scans don't thrash the cache:
- */
- clear_btree_node_accessed(b);
-
bkey_copy(&b->key, k);
if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */
@@ -742,33 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
}
set_btree_node_read_in_flight(b);
-
six_unlock_write(&b->c.lock);
- seq = six_lock_seq(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- /* Unlock before doing IO: */
- if (path && sync)
- bch2_trans_unlock_noassert(trans);
+ if (path) {
+ u32 seq = six_lock_seq(&b->c.lock);
- bch2_btree_node_read(c, b, sync);
+ /* Unlock before doing IO: */
+ six_unlock_intent(&b->c.lock);
+ bch2_trans_unlock_noassert(trans);
- if (!sync)
- return NULL;
+ bch2_btree_node_read(trans, b, sync);
- if (path) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
+ if (!sync)
+ return NULL;
- if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- if (path)
- trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ b = NULL;
+ } else {
+ bch2_btree_node_read(trans, b, sync);
+ if (lock_type == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
}
return b;
@@ -785,19 +810,20 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
"btree node header doesn't match ptr\n"
"btree %s level %u\n"
"ptr: ",
- bch2_btree_ids[b->c.btree_id], b->c.level);
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_printf(&buf, "\nheader: btree %s level %llu\n"
"min ",
- bch2_btree_ids[BTREE_NODE_ID(b->data)],
+ bch2_btree_id_str(BTREE_NODE_ID(b->data)),
BTREE_NODE_LEVEL(b->data));
bch2_bpos_to_text(&buf, b->data->min_key);
prt_printf(&buf, "\nmax ");
bch2_bpos_to_text(&buf, b->data->max_key);
- bch2_fs_inconsistent(c, "%s", buf.buf);
+ bch2_fs_topology_error(c, "%s", buf.buf);
+
printbuf_exit(&buf);
}
@@ -907,7 +933,7 @@ retry:
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-BCH_ERR_btree_node_read_error);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -998,7 +1024,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-BCH_ERR_btree_node_read_error);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1041,7 +1067,7 @@ retry:
goto retry;
if (IS_ERR(b) &&
- !bch2_btree_cache_cannibalize_lock(c, NULL))
+ !bch2_btree_cache_cannibalize_lock(trans, NULL))
goto retry;
if (IS_ERR(b))
@@ -1081,7 +1107,7 @@ lock_node:
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
- b = ERR_PTR(-EIO);
+ b = ERR_PTR(-BCH_ERR_btree_node_read_error);
goto out;
}
@@ -1089,7 +1115,7 @@ lock_node:
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
btree_check_header(c, b);
out:
- bch2_btree_cache_cannibalize_unlock(c);
+ bch2_btree_cache_cannibalize_unlock(trans);
return b;
}
@@ -1100,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
- BUG_ON(trans && !btree_node_locked(path, level + 1));
+ BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_cache_find(bc, k);
+ struct btree *b = btree_cache_find(bc, k);
if (b)
return 0;
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- return PTR_ERR_OR_ZERO(b);
+ if (!IS_ERR_OR_NULL(b))
+ six_unlock_read(&b->c.lock);
+ return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1123,6 +1150,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
b = btree_cache_find(bc, k);
if (!b)
return;
+
+ BUG_ON(b == btree_node_root(trans->c, b));
wait_on_io:
/* not allowed to wait on io with btree locks held: */
@@ -1134,6 +1163,8 @@ wait_on_io:
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+ goto out;
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1148,13 +1179,26 @@ wait_on_io:
btree_node_data_free(c, b);
bch2_btree_node_hash_remove(bc, b);
mutex_unlock(&bc->lock);
-
+out:
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}
-void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
- const struct btree *b)
+const char *bch2_btree_id_str(enum btree_id btree)
+{
+ return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
+}
+
+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+ prt_printf(out, "%s level %u/%u\n ",
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level,
+ bch2_btree_id_root(c, b->c.btree_id)->level);
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+}
+
+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
{
struct bset_stats stats;
@@ -1185,7 +1229,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
" failed unpacked %zu\n",
b->unpack_fn_len,
b->nr.live_u64s * sizeof(u64),
- btree_bytes(c) - sizeof(struct btree_node),
+ btree_buf_bytes(b) - sizeof(struct btree_node),
b->nr.live_u64s * 100 / btree_max_u64s(c),
b->sib_u64s[0],
b->sib_u64s[1],