diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-11-13 18:36:33 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:49 -0400 |
commit | ebb84d094141eac9ee3e22d95abc9792a1c79eca (patch) | |
tree | 4d5e66377dd2a124a626bad434c46c8d7f8e67b8 /fs/bcachefs/journal.c | |
parent | 5db43418d5097b8aca5c725eb301186dee04c70a (diff) |
bcachefs: Increase journal pipelining
This patch increases the maximum journal buffers in flight from 2 to 4 -
this will be particularly helpful when in the future we stop requiring
flush+fua for every journal write.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/journal.c')
-rw-r--r-- | fs/bcachefs/journal.c | 143 |
1 files changed, 88 insertions, 55 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 0cd868c8248b..ac2dddd90c31 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -23,7 +23,7 @@ static u64 last_unwritten_seq(struct journal *j) lockdep_assert_held(&j->lock); - return journal_cur_seq(j) - s.prev_buf_unwritten; + return journal_cur_seq(j) - ((s.idx - s.unwritten_idx) & JOURNAL_BUF_MASK); } static inline bool journal_seq_unwritten(struct journal *j, u64 seq) @@ -51,7 +51,7 @@ journal_seq_to_buf(struct journal *j, u64 seq) j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL); if (journal_seq_unwritten(j, seq)) { - buf = j->buf + (seq & 1); + buf = j->buf + (seq & JOURNAL_BUF_MASK); EBUG_ON(le64_to_cpu(buf->data->seq) != seq); } return buf; @@ -108,15 +108,8 @@ void bch2_journal_halt(struct journal *j) /* journal entry close/open: */ -void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set) +void __bch2_journal_buf_put(struct journal *j) { - if (!need_write_just_set && - test_bit(JOURNAL_NEED_WRITE, &j->flags)) - bch2_time_stats_update(j->delay_time, - j->need_write_time); - - clear_bit(JOURNAL_NEED_WRITE, &j->flags); - closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); } @@ -129,7 +122,6 @@ static bool __journal_entry_close(struct journal *j) struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); - bool set_need_write = false; unsigned sectors; lockdep_assert_held(&j->lock); @@ -148,15 +140,13 @@ static bool __journal_entry_close(struct journal *j) if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) { set_bit(JOURNAL_NEED_WRITE, &j->flags); j->need_write_time = local_clock(); - set_need_write = true; } - if (new.prev_buf_unwritten) - return false; - new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL; new.idx++; - new.prev_buf_unwritten = 1; + + if (new.idx == new.unwritten_idx) + return false; BUG_ON(journal_state_count(new, new.idx)); } while ((v = atomic64_cmpxchg(&j->reservations.counter, @@ -190,24 +180,44 @@ static bool __journal_entry_close(struct journal *j) */ buf->data->last_seq = cpu_to_le64(journal_last_seq(j)); + __bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq)); + journal_pin_new_entry(j, 1); bch2_journal_buf_init(j); cancel_delayed_work(&j->write_work); + clear_bit(JOURNAL_NEED_WRITE, &j->flags); bch2_journal_space_available(j); - bch2_journal_buf_put(j, old.idx, set_need_write); + bch2_journal_buf_put(j, old.idx); return true; } +static bool journal_entry_want_write(struct journal *j) +{ + union journal_res_state s = READ_ONCE(j->reservations); + bool ret = false; + + /* + * Don't close it yet if we already have a write in flight, but do set + * NEED_WRITE: + */ + if (s.idx != s.unwritten_idx) + set_bit(JOURNAL_NEED_WRITE, &j->flags); + else + ret = __journal_entry_close(j); + + return ret; +} + static bool journal_entry_close(struct journal *j) { bool ret; spin_lock(&j->lock); - ret = __journal_entry_close(j); + ret = journal_entry_want_write(j); spin_unlock(&j->lock); return ret; @@ -289,8 +299,8 @@ static int journal_entry_open(struct journal *j) static bool journal_quiesced(struct journal *j) { - union journal_res_state state = READ_ONCE(j->reservations); - bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state); + union journal_res_state s = READ_ONCE(j->reservations); + bool ret = s.idx == s.unwritten_idx && !__journal_entry_is_open(s); if (!ret) journal_entry_close(j); @@ -317,17 +327,29 @@ static void journal_write_work(struct work_struct *work) u64 bch2_inode_journal_seq(struct journal *j, u64 inode) { size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); - u64 seq = 0; + union journal_res_state s; + unsigned i; + u64 seq; - if (!test_bit(h, j->buf[0].has_inode) && - !test_bit(h, j->buf[1].has_inode)) - return 0; spin_lock(&j->lock); - if (test_bit(h, journal_cur_buf(j)->has_inode)) - seq = journal_cur_seq(j); - else if (test_bit(h, journal_prev_buf(j)->has_inode)) - seq = journal_cur_seq(j) - 1; + seq = journal_cur_seq(j); + s = READ_ONCE(j->reservations); + i = s.idx; + + while (1) { + if (test_bit(h, j->buf[i].has_inode)) + goto out; + + if (i == s.unwritten_idx) + break; + + i = (i - 1) & JOURNAL_BUF_MASK; + seq--; + } + + seq = 0; +out: spin_unlock(&j->lock); return seq; @@ -574,7 +596,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, BUG(); if (seq == journal_cur_seq(j)) - __journal_entry_close(j); + journal_entry_want_write(j); out: spin_unlock(&j->lock); return ret; @@ -863,15 +885,18 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) { union journal_res_state state; - struct journal_buf *w; - bool ret; + bool ret = false; + unsigned i; spin_lock(&j->lock); state = READ_ONCE(j->reservations); - w = j->buf + !state.idx; + i = state.idx; - ret = state.prev_buf_unwritten && - bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx); + while (i != state.unwritten_idx) { + i = (i - 1) & JOURNAL_BUF_MASK; + if (bch2_bkey_has_device(bkey_i_to_s_c(&j->buf[i].key), dev_idx)) + ret = true; + } spin_unlock(&j->lock); return ret; @@ -957,7 +982,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, journal_pin_new_entry(j, 1); - j->reservations.idx = journal_cur_seq(j); + j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j); bch2_journal_buf_init(j); @@ -1015,8 +1040,10 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) void bch2_fs_journal_exit(struct journal *j) { - kvpfree(j->buf[1].data, j->buf[1].buf_size); - kvpfree(j->buf[0].data, j->buf[0].buf_size); + unsigned i; + + for (i = 0; i < ARRAY_SIZE(j->buf); i++) + kvpfree(j->buf[i].data, j->buf[i].buf_size); free_fifo(&j->pin); } @@ -1024,6 +1051,7 @@ int bch2_fs_journal_init(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); static struct lock_class_key res_key; + unsigned i; int ret = 0; pr_verbose_init(c->opts, ""); @@ -1038,8 +1066,6 @@ int bch2_fs_journal_init(struct journal *j) lockdep_init_map(&j->res_map, "journal res", &res_key, 0); - j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN; - j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN; j->write_delay_ms = 1000; j->reclaim_delay_ms = 100; @@ -1051,13 +1077,20 @@ int bch2_fs_journal_init(struct journal *j) ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); - if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) || - !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) { + if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) { ret = -ENOMEM; goto out; } + for (i = 0; i < ARRAY_SIZE(j->buf); i++) { + j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN; + j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL); + if (!j->buf[i].data) { + ret = -ENOMEM; + goto out; + } + } + j->pin.front = j->pin.back = 1; out: pr_verbose_init(c->opts, "ret %i", ret); @@ -1071,7 +1104,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); union journal_res_state s; struct bch_dev *ca; - unsigned iter; + unsigned i; rcu_read_lock(); spin_lock(&j->lock); @@ -1114,16 +1147,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } pr_buf(out, - "current entry refs:\t%u\n" - "prev entry unwritten:\t", - journal_state_count(s, s.idx)); - - if (s.prev_buf_unwritten) - pr_buf(out, "yes, ref %u sectors %u\n", - journal_state_count(s, !s.idx), - journal_prev_buf(j)->sectors); - else - pr_buf(out, "no\n"); + "current entry:\tidx %u refcount %u\n", + s.idx, journal_state_count(s, s.idx)); + + i = s.idx; + while (i != s.unwritten_idx) { + i = (i - 1) & JOURNAL_BUF_MASK; + + pr_buf(out, "unwritten entry:\tidx %u refcount %u sectors %u\n", + i, journal_state_count(s, i), j->buf[i].sectors); + } pr_buf(out, "need write:\t\t%i\n" @@ -1131,7 +1164,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) test_bit(JOURNAL_NEED_WRITE, &j->flags), test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - for_each_member_device_rcu(ca, c, iter, + for_each_member_device_rcu(ca, c, i, &c->rw_devs[BCH_DATA_journal]) { struct journal_device *ja = &ca->journal; @@ -1146,7 +1179,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "\tdirty_idx_ondisk\t%u (seq %llu)\n" "\tdirty_idx\t\t%u (seq %llu)\n" "\tcur_idx\t\t%u (seq %llu)\n", - iter, ja->nr, + i, ja->nr, bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free, ja->discard_idx, |