aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs/alloc_background.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/alloc_background.c')
-rw-r--r--fs/bcachefs/alloc_background.c237
1 files changed, 142 insertions, 95 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 1de9fac3bcf4..d9c5a92fa708 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -14,6 +15,7 @@
#include "buckets_waiting_for_journal.h"
#include "clock.h"
#include "debug.h"
+#include "disk_accounting.h"
#include "ec.h"
#include "error.h"
#include "lru.h"
@@ -267,27 +269,41 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
i == READ ? "read" : "write",
a.v->io_time[i], LRU_TIME_MAX);
+ unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) >
+ offsetof(struct bch_alloc_v4, stripe_sectors)
+ ? a.v->stripe_sectors
+ : 0;
+
switch (a.v->data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
- bkey_fsck_err_on(bch2_bucket_sectors_total(*a.v) || a.v->stripe,
+ bkey_fsck_err_on(stripe_sectors ||
+ a.v->dirty_sectors ||
+ a.v->cached_sectors ||
+ a.v->stripe,
c, err, alloc_key_empty_but_have_data,
- "empty data type free but have data");
+ "empty data type free but have data %u.%u.%u %u",
+ stripe_sectors,
+ a.v->dirty_sectors,
+ a.v->cached_sectors,
+ a.v->stripe);
break;
case BCH_DATA_sb:
case BCH_DATA_journal:
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
- bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
+ bkey_fsck_err_on(!a.v->dirty_sectors &&
+ !stripe_sectors,
c, err, alloc_key_dirty_sectors_0,
"data_type %s but dirty_sectors==0",
bch2_data_type_str(a.v->data_type));
break;
case BCH_DATA_cached:
bkey_fsck_err_on(!a.v->cached_sectors ||
- bch2_bucket_sectors_dirty(*a.v) ||
+ a.v->dirty_sectors ||
+ stripe_sectors ||
a.v->stripe,
c, err, alloc_key_cached_inconsistency,
"data type inconsistency");
@@ -318,6 +334,7 @@ void bch2_alloc_v4_swab(struct bkey_s k)
a->stripe = swab32(a->stripe);
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
a->fragmentation_lru = swab64(a->fragmentation_lru);
+ a->stripe_sectors = swab32(a->stripe_sectors);
bps = alloc_v4_backpointers(a);
for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
@@ -342,6 +359,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a));
prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a));
prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors);
+ prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors);
prt_printf(out, "cached_sectors %u\n", a->cached_sectors);
prt_printf(out, "stripe %u\n", a->stripe);
prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy);
@@ -459,7 +477,8 @@ err:
}
__flatten
-struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos)
+struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
+ enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
@@ -467,7 +486,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
- ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
bch2_trans_iter_exit(trans, &iter);
return unlikely(ret) ? ERR_PTR(ret) : a;
}
@@ -578,8 +597,6 @@ int bch2_alloc_read(struct bch_fs *c)
struct bch_dev *ca = NULL;
int ret;
- down_read(&c->gc_lock);
-
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_prefetch, k, ({
@@ -628,7 +645,6 @@ int bch2_alloc_read(struct bch_fs *c)
bch2_dev_put(ca);
bch2_trans_put(trans);
- up_read(&c->gc_lock);
bch_err_fn(c, ret);
return ret;
@@ -743,6 +759,61 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
return ret;
}
+static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca,
+ enum bch_data_type data_type,
+ s64 delta_buckets,
+ s64 delta_sectors,
+ s64 delta_fragmented, unsigned flags)
+{
+ struct disk_accounting_pos acc = {
+ .type = BCH_DISK_ACCOUNTING_dev_data_type,
+ .dev_data_type.dev = ca->dev_idx,
+ .dev_data_type.data_type = data_type,
+ };
+ s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
+
+ return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
+}
+
+int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
+ const struct bch_alloc_v4 *old,
+ const struct bch_alloc_v4 *new,
+ unsigned flags)
+{
+ s64 old_sectors = bch2_bucket_sectors(*old);
+ s64 new_sectors = bch2_bucket_sectors(*new);
+ if (old->data_type != new->data_type) {
+ int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
+ 1, new_sectors, bch2_bucket_sectors_fragmented(ca, *new), flags) ?:
+ bch2_dev_data_type_accounting_mod(trans, ca, old->data_type,
+ -1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags);
+ if (ret)
+ return ret;
+ } else if (old_sectors != new_sectors) {
+ int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
+ 0,
+ new_sectors - old_sectors,
+ bch2_bucket_sectors_fragmented(ca, *new) -
+ bch2_bucket_sectors_fragmented(ca, *old), flags);
+ if (ret)
+ return ret;
+ }
+
+ s64 old_unstriped = bch2_bucket_sectors_unstriped(*old);
+ s64 new_unstriped = bch2_bucket_sectors_unstriped(*new);
+ if (old_unstriped != new_unstriped) {
+ int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped,
+ !!new_unstriped - !!old_unstriped,
+ new_unstriped - old_unstriped,
+ 0,
+ flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int bch2_trigger_alloc(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s new,
@@ -758,10 +829,9 @@ int bch2_trigger_alloc(struct btree_trans *trans,
struct bch_alloc_v4 old_a_convert;
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
+ struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
if (flags & BTREE_TRIGGER_transactional) {
- struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
-
alloc_data_type_set(new_a, new_a->data_type);
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
@@ -818,22 +888,21 @@ int bch2_trigger_alloc(struct btree_trans *trans,
goto err;
}
- /*
- * need to know if we're getting called from the invalidate path or
- * not:
- */
-
if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
old_a->cached_sectors) {
- ret = bch2_update_cached_sectors_list(trans, new.k->p.inode,
- -((s64) old_a->cached_sectors));
+ ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
+ -((s64) old_a->cached_sectors),
+ flags & BTREE_TRIGGER_gc);
if (ret)
goto err;
}
+
+ ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
+ if (ret)
+ goto err;
}
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
- struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
u64 journal_seq = trans->journal_res.seq;
u64 bucket_journal_seq = new_a->journal_seq;
@@ -862,26 +931,22 @@ int bch2_trigger_alloc(struct btree_trans *trans,
c->journal.flushed_seq_ondisk,
new.k->p.inode, new.k->p.offset,
bucket_journal_seq);
- if (ret) {
- bch2_fs_fatal_error(c,
- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
+ if (bch2_fs_fatal_err_on(ret, c,
+ "setting bucket_needs_journal_commit: %s", bch2_err_str(ret)))
goto err;
- }
}
- percpu_down_read(&c->mark_lock);
if (new_a->gen != old_a->gen) {
+ rcu_read_lock();
u8 *gen = bucket_gen(ca, new.k->p.offset);
if (unlikely(!gen)) {
- percpu_up_read(&c->mark_lock);
+ rcu_read_unlock();
goto invalid_bucket;
}
*gen = new_a->gen;
+ rcu_read_unlock();
}
- bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
- percpu_up_read(&c->mark_lock);
-
#define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
#define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr)
#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
@@ -904,31 +969,16 @@ int bch2_trigger_alloc(struct btree_trans *trans,
bch2_gc_gens_async(c);
}
- if ((flags & BTREE_TRIGGER_gc) &&
- (flags & BTREE_TRIGGER_bucket_invalidate)) {
- struct bch_alloc_v4 new_a_convert;
- const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);
-
- percpu_down_read(&c->mark_lock);
+ if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
+ rcu_read_lock();
struct bucket *g = gc_bucket(ca, new.k->p.offset);
if (unlikely(!g)) {
- percpu_up_read(&c->mark_lock);
+ rcu_read_unlock();
goto invalid_bucket;
}
g->gen_valid = 1;
-
- bucket_lock(g);
-
- g->gen_valid = 1;
- g->gen = new_a->gen;
- g->data_type = new_a->data_type;
- g->stripe = new_a->stripe;
- g->stripe_redundancy = new_a->stripe_redundancy;
- g->dirty_sectors = new_a->dirty_sectors;
- g->cached_sectors = new_a->cached_sectors;
-
- bucket_unlock(g);
- percpu_up_read(&c->mark_lock);
+ g->gen = new_a->gen;
+ rcu_read_unlock();
}
err:
printbuf_exit(&buf);
@@ -1062,7 +1112,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p);
if (fsck_err_on(!ca,
- c, alloc_key_to_missing_dev_bucket,
+ trans, alloc_key_to_missing_dev_bucket,
"alloc key for invalid device:bucket %llu:%llu",
alloc_k.k->p.inode, alloc_k.k->p.offset))
ret = bch2_btree_delete_at(trans, alloc_iter, 0);
@@ -1082,7 +1132,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
goto err;
if (fsck_err_on(k.k->type != discard_key_type,
- c, need_discard_key_wrong,
+ trans, need_discard_key_wrong,
"incorrect key in need_discard btree (got %s should be %s)\n"
" %s",
bch2_bkey_types[k.k->type],
@@ -1112,7 +1162,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
goto err;
if (fsck_err_on(k.k->type != freespace_key_type,
- c, freespace_key_wrong,
+ trans, freespace_key_wrong,
"incorrect key in freespace btree (got %s should be %s)\n"
" %s",
bch2_bkey_types[k.k->type],
@@ -1143,7 +1193,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
goto err;
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
- c, bucket_gens_key_wrong,
+ trans, bucket_gens_key_wrong,
"incorrect gen in bucket_gens btree (got %u should be %u)\n"
" %s",
alloc_gen(k, gens_offset), a->gen,
@@ -1184,7 +1234,6 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
struct bpos *end,
struct btree_iter *freespace_iter)
{
- struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1202,7 +1251,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
*end = bkey_min(k.k->p, *end);
if (fsck_err_on(k.k->type != KEY_TYPE_set,
- c, freespace_hole_missing,
+ trans, freespace_hole_missing,
"hole in alloc btree missing in freespace btree\n"
" device %llu buckets %llu-%llu",
freespace_iter->pos.inode,
@@ -1238,7 +1287,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
struct bpos *end,
struct btree_iter *bucket_gens_iter)
{
- struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
unsigned i, gens_offset, gens_end_offset;
@@ -1262,7 +1310,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
bkey_reassemble(&g.k_i, k);
for (i = gens_offset; i < gens_end_offset; i++) {
- if (fsck_err_on(g.v.gens[i], c,
+ if (fsck_err_on(g.v.gens[i], trans,
bucket_gens_hole_wrong,
"hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
bucket_gens_pos_to_alloc(k.k->p, i).inode,
@@ -1320,8 +1368,8 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran
if (ret)
return ret;
- if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
- need_discard_freespace_key_to_invalid_dev_bucket,
+ if (fsck_err_on(!bch2_dev_bucket_exists(c, pos),
+ trans, need_discard_freespace_key_to_invalid_dev_bucket,
"entry in %s btree for nonexistant dev:bucket %llu:%llu",
bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
goto delete;
@@ -1330,8 +1378,8 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran
if (fsck_err_on(a->data_type != state ||
(state == BCH_DATA_free &&
- genbits != alloc_freespace_genbits(*a)), c,
- need_discard_freespace_key_bad,
+ genbits != alloc_freespace_genbits(*a)),
+ trans, need_discard_freespace_key_bad,
"%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
bch2_btree_id_str(iter->btree_id),
@@ -1378,7 +1426,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
if (!ca) {
- if (fsck_err(c, bucket_gens_to_invalid_dev,
+ if (fsck_err(trans, bucket_gens_to_invalid_dev,
"bucket_gens key for invalid device:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1386,8 +1434,8 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
}
if (fsck_err_on(end <= ca->mi.first_bucket ||
- start >= ca->mi.nbuckets, c,
- bucket_gens_to_invalid_buckets,
+ start >= ca->mi.nbuckets,
+ trans, bucket_gens_to_invalid_buckets,
"bucket_gens key for invalid buckets:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1395,16 +1443,16 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
}
for (b = start; b < ca->mi.first_bucket; b++)
- if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
- bucket_gens_nonzero_for_invalid_buckets,
+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
+ trans, bucket_gens_nonzero_for_invalid_buckets,
"bucket_gens key has nonzero gen for invalid bucket")) {
g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
need_update = true;
}
for (b = ca->mi.nbuckets; b < end; b++)
- if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
- bucket_gens_nonzero_for_invalid_buckets,
+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
+ trans, bucket_gens_nonzero_for_invalid_buckets,
"bucket_gens key has nonzero gen for invalid bucket")) {
g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
need_update = true;
@@ -1553,13 +1601,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
- struct btree_iter *alloc_iter)
+ struct btree_iter *alloc_iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
- struct bkey_s_c alloc_k, lru_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1573,11 +1621,19 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
+ if (a->fragmentation_lru) {
+ ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
+ a->fragmentation_lru,
+ alloc_k, last_flushed);
+ if (ret)
+ return ret;
+ }
+
if (a->data_type != BCH_DATA_cached)
return 0;
- if (fsck_err_on(!a->io_time[READ], c,
- alloc_key_cached_but_read_time_zero,
+ if (fsck_err_on(!a->io_time[READ],
+ trans, alloc_key_cached_but_read_time_zero,
"cached bucket with read_time 0\n"
" %s",
(printbuf_reset(&buf),
@@ -1597,41 +1653,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
- lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
- lru_pos(alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]), 0);
- ret = bkey_err(lru_k);
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
+ alloc_k, last_flushed);
if (ret)
- return ret;
-
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
- alloc_key_to_missing_lru_entry,
- "missing lru entry\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- ret = bch2_lru_set(trans,
- alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]);
- if (ret)
- goto err;
- }
+ goto err;
err:
fsck_err:
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}
@@ -1962,7 +2007,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
return 0;
- a = bch2_trans_start_alloc_update(trans, bucket);
+ a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
@@ -1983,6 +2028,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
a->v.gen++;
a->v.data_type = 0;
a->v.dirty_sectors = 0;
+ a->v.stripe_sectors = 0;
a->v.cached_sectors = 0;
a->v.io_time[READ] = bch2_current_io_time(c, READ);
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
@@ -2338,6 +2384,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
reserved_sectors = min(reserved_sectors, capacity);
+ c->reserved = reserved_sectors;
c->capacity = capacity - reserved_sectors;
c->bucket_size_max = bucket_size_max;