aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/super.c')
-rw-r--r--fs/bcachefs/super.c132
1 files changed, 73 insertions, 59 deletions
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index df2bea38e83f..0455a1001fec 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -25,6 +25,7 @@
#include "clock.h"
#include "compress.h"
#include "debug.h"
+#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
#include "errcode.h"
@@ -88,6 +89,19 @@ const char * const bch2_fs_flag_strs[] = {
NULL
};
+void bch2_print_str(struct bch_fs *c, const char *str)
+{
+#ifdef __KERNEL__
+ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
+
+ if (unlikely(stdio)) {
+ bch2_stdio_redirect_printf(stdio, true, "%s", str);
+ return;
+ }
+#endif
+ bch2_print_string_as_lines(KERN_ERR, str);
+}
+
__printf(2, 0)
static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
{
@@ -222,22 +236,6 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
return c;
}
-static void bch2_dev_usage_journal_reserve(struct bch_fs *c)
-{
- unsigned nr = 0, u64s =
- ((sizeof(struct jset_entry_dev_usage) +
- sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) /
- sizeof(u64);
-
- rcu_read_lock();
- for_each_member_device_rcu(c, ca, NULL)
- nr++;
- rcu_read_unlock();
-
- bch2_journal_entry_res_resize(&c->journal,
- &c->dev_usage_journal_res, u64s * nr);
-}
-
/* Filesystem RO/RW: */
/*
@@ -376,6 +374,7 @@ void bch2_fs_read_only(struct bch_fs *c)
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
BUG_ON(c->btree_write_buffer.inc.keys.nr);
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
+ bch2_verify_accounting_clean(c);
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
@@ -536,7 +535,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
- bch2_fs_allocator_background_exit(c);
+ bch2_fs_accounting_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -564,11 +563,15 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
- free_percpu(c->online_reserved);
+ if (c->online_reserved) {
+ u64 v = percpu_u64_get(c->online_reserved);
+ WARN(v, "online_reserved not 0 at shutdown: %lli", v);
+ free_percpu(c->online_reserved);
+ }
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
+ free_percpu(c->usage);
mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
@@ -582,8 +585,10 @@ static void __bch2_fs_free(struct bch_fs *c)
if (c->write_ref_wq)
destroy_workqueue(c->write_ref_wq);
- if (c->io_complete_wq)
- destroy_workqueue(c->io_complete_wq);
+ if (c->btree_write_submit_wq)
+ destroy_workqueue(c->btree_write_submit_wq);
+ if (c->btree_read_complete_wq)
+ destroy_workqueue(c->btree_read_complete_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->btree_io_complete_wq)
@@ -785,8 +790,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->list);
- mutex_init(&c->usage_scratch_lock);
-
mutex_init(&c->bio_bounce_pages_lock);
mutex_init(&c->snapshot_table_lock);
init_rwsem(&c->snapshot_create_lock);
@@ -878,8 +881,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
- !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
+ !(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
+ !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
+ WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)) ||
#ifndef BCH_WRITE_REF_DEBUG
@@ -892,6 +897,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) ||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
+ !(c->usage = alloc_percpu(struct bch_fs_usage_base)) ||
!(c->online_reserved = alloc_percpu(u64)) ||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
c->opts.btree_node_size) ||
@@ -907,10 +913,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_io_clock_init(&c->io_clock[READ]) ?:
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_journal_init(&c->journal) ?:
- bch2_fs_replicas_init(c) ?:
+ bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
- bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
@@ -927,17 +932,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
- for (i = 0; i < c->sb.nr_devices; i++)
- if (bch2_member_exists(c->disk_sb.sb, i) &&
- bch2_dev_alloc(c, i)) {
- ret = -EEXIST;
+ for (i = 0; i < c->sb.nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+ ret = bch2_dev_alloc(c, i);
+ if (ret)
goto err;
- }
+ }
bch2_journal_entry_res_resize(&c->journal,
&c->btree_root_journal_res,
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX));
- bch2_dev_usage_journal_reserve(c);
bch2_journal_entry_res_resize(&c->journal,
&c->clock_journal_res,
(sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
@@ -963,7 +968,7 @@ static void print_mount_opts(struct bch_fs *c)
struct printbuf p = PRINTBUF;
bool first = true;
- prt_str(&p, "mounting version ");
+ prt_str(&p, "starting version ");
bch2_version_to_text(&p, c->sb.version);
if (c->opts.read_only) {
@@ -1190,6 +1195,7 @@ static void bch2_dev_free(struct bch_dev *ca)
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
+ bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
free_percpu(ca->io_done);
@@ -1312,6 +1318,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
+ bch2_dev_allocator_background_init(ca);
+
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@@ -1524,6 +1532,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
+ bch2_recalc_capacity(c);
bch2_dev_journal_stop(&c->journal, ca);
}
@@ -1535,6 +1544,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ bch2_dev_do_discards(ca);
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
@@ -1603,7 +1613,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
- BTREE_TRIGGER_norun, NULL);
+ BTREE_TRIGGER_norun, NULL) ?:
+ bch2_dev_usage_remove(c, ca->dev_idx);
bch_err_msg(c, ret, "removing dev alloc info");
return ret;
}
@@ -1640,6 +1651,16 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
if (ret)
goto err;
+ /*
+ * We need to flush the entire journal to get rid of keys that reference
+ * the device being removed before removing the superblock entry
+ */
+ bch2_journal_flush_all_pins(&c->journal);
+
+ /*
+ * this is really just needed for the bch2_replicas_gc_(start|end)
+ * calls, and could be cleaned up:
+ */
ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
bch_err_msg(ca, ret, "bch2_journal_flush_device_pins()");
if (ret)
@@ -1683,17 +1704,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
bch2_dev_free(ca);
/*
- * At this point the device object has been removed in-core, but the
- * on-disk journal might still refer to the device index via sb device
- * usage entries. Recovery fails if it sees usage information for an
- * invalid device. Flush journal pins to push the back of the journal
- * past now invalid device index references before we update the
- * superblock, but after the device object has been removed so any
- * further journal writes elide usage info for the device.
- */
- bch2_journal_flush_all_pins(&c->journal);
-
- /*
* Free this device's slot in the bch_member array - all pointers to
* this device must be gone:
*/
@@ -1705,8 +1715,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
mutex_unlock(&c->sb_lock);
up_write(&c->state_lock);
-
- bch2_dev_usage_journal_reserve(c);
return 0;
err:
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
@@ -1754,13 +1762,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err;
}
- bch2_dev_usage_init(ca);
-
ret = __bch2_dev_attach_bdev(ca, &sb);
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@@ -1837,7 +1843,9 @@ have_slot:
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- bch2_dev_usage_journal_reserve(c);
+ ret = bch2_dev_usage_init(ca, false);
+ if (ret)
+ goto err_late;
ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional);
bch_err_msg(ca, ret, "marking new superblock");
@@ -1920,7 +1928,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;
@@ -2009,15 +2017,18 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
mutex_unlock(&c->sb_lock);
if (ca->mi.freespace_initialized) {
- ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
+ struct disk_accounting_pos acc = {
+ .type = BCH_DISK_ACCOUNTING_dev_data_type,
+ .dev_data_type.dev = ca->dev_idx,
+ .dev_data_type.data_type = BCH_DATA_free,
+ };
+ u64 v[3] = { nbuckets - old_nbuckets, 0, 0 };
+
+ ret = bch2_trans_do(ca->fs, NULL, NULL, 0,
+ bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?:
+ bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
if (ret)
goto err;
-
- /*
- * XXX: this is all wrong transactionally - we'll be able to do
- * this correctly after the disk space accounting rewrite
- */
- ca->usage_base->d[BCH_DATA_free].buckets += nbuckets - old_nbuckets;
}
bch2_recalc_capacity(c);
@@ -2029,6 +2040,9 @@ err:
/* return with ref on ca->ref: */
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
{
+ if (!strncmp(name, "/dev/", strlen("/dev/")))
+ name += strlen("/dev/");
+
for_each_member_device(c, ca)
if (!strcmp(name, ca->name))
return ca;