aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_locking.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_locking.c')
-rw-r--r--fs/bcachefs/btree_locking.c216
1 files changed, 147 insertions, 69 deletions
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 40c8ed8f7bf1..f2caf491957e 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -32,13 +32,14 @@ struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
{
struct btree_path *path;
struct six_lock_count ret;
+ unsigned i;
memset(&ret, 0, sizeof(ret));
if (IS_ERR_OR_NULL(b))
return ret;
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
if (path != skip && &path->l[level].b->c == b) {
int t = btree_node_locked_type(path, level);
@@ -85,8 +86,14 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
prt_printf(out, "Found lock cycle (%u entries):", g->nr);
prt_newline(out);
- for (i = g->g; i < g->g + g->nr; i++)
+ for (i = g->g; i < g->g + g->nr; i++) {
+ struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
+ if (!task)
+ continue;
+
bch2_btree_trans_to_text(out, i->trans);
+ bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT);
+ }
}
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
@@ -94,9 +101,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
struct trans_waiting_for_lock *i;
for (i = g->g; i != g->g + g->nr; i++) {
+ struct task_struct *task = i->trans->locking_wait.task;
if (i != g->g)
prt_str(out, "<- ");
- prt_printf(out, "%u ", i->trans->locking_wait.task->pid);
+ prt_printf(out, "%u ", task ?task->pid : 0);
}
prt_newline(out);
}
@@ -142,10 +150,27 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g)
return false;
}
+static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+
+ count_event(c, trans_restart_would_deadlock);
+
+ if (trace_trans_restart_would_deadlock_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ buf.atomic++;
+ print_cycle(&buf, g);
+
+ trace_trans_restart_would_deadlock(trans, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
{
if (i == g->g) {
- trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
+ trace_would_deadlock(g, i->trans);
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
} else {
i->trans->lock_must_abort = true;
@@ -202,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
prt_printf(&buf, "backtrace:");
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task);
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
}
@@ -262,27 +287,40 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
struct lock_graph g;
struct trans_waiting_for_lock *top;
struct btree_bkey_cached_common *b;
- struct btree_path *path;
- unsigned path_idx;
- int ret;
+ btree_path_idx_t path_idx;
+ int ret = 0;
+
+ g.nr = 0;
if (trans->lock_must_abort) {
if (cycle)
return -1;
- trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
+ trace_would_deadlock(&g, trans);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
}
- g.nr = 0;
lock_graph_down(&g, trans);
+
+ /* trans->paths is rcu protected vs. freeing */
+ rcu_read_lock();
+ if (cycle)
+ cycle->atomic++;
next:
if (!g.nr)
- return 0;
+ goto out;
top = &g.g[g.nr - 1];
- trans_for_each_path_safe_from(top->trans, path, path_idx, top->path_idx) {
+ struct btree_path *paths = rcu_dereference(top->trans->paths);
+ if (!paths)
+ goto up;
+
+ unsigned long *paths_allocated = trans_paths_allocated(paths);
+
+ trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths),
+ path_idx, top->path_idx) {
+ struct btree_path *path = paths + path_idx;
if (!path->nodes_locked)
continue;
@@ -348,18 +386,23 @@ next:
ret = lock_graph_descend(&g, trans, cycle);
if (ret)
- return ret;
+ goto out;
goto next;
}
raw_spin_unlock(&b->lock.wait_lock);
}
}
-
+up:
if (g.nr > 1 && cycle)
print_chain(cycle, &g);
lock_graph_up(&g);
goto next;
+out:
+ if (cycle)
+ --cycle->atomic;
+ rcu_read_unlock();
+ return ret;
}
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
@@ -397,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
- struct btree_path *linked;
- unsigned i;
- int ret;
-
- /*
- * XXX BIG FAT NOTICE
- *
- * Drop all read locks before taking a write lock:
- *
- * This is a hack, because bch2_btree_node_lock_write_nofail() is a
- * hack - but by dropping read locks first, this should never fail, and
- * we only use this in code paths where whatever read locks we've
- * already taken are no longer needed:
- */
-
- trans_for_each_path(trans, linked) {
- if (!linked->nodes_locked)
- continue;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_read_locked(linked, i)) {
- btree_node_unlock(trans, linked, i);
- btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
- }
- }
-
- ret = __btree_node_lock_write(trans, path, b, true);
+ int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
@@ -431,7 +448,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
static inline bool btree_path_get_locks(struct btree_trans *trans,
struct btree_path *path,
- bool upgrade)
+ bool upgrade,
+ struct get_locks_fail *f)
{
unsigned l = path->level;
int fail_idx = -1;
@@ -442,8 +460,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
if (!(upgrade
? bch2_btree_node_upgrade(trans, path, l)
- : bch2_btree_node_relock(trans, path, l)))
- fail_idx = l;
+ : bch2_btree_node_relock(trans, path, l))) {
+ fail_idx = l;
+
+ if (f) {
+ f->l = l;
+ f->b = path->l[l].b;
+ }
+ }
l++;
} while (l < path->locks_want);
@@ -581,16 +605,17 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans,
}
__flatten
-bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
- struct btree_path *path, unsigned long trace_ip)
+bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
{
- return btree_path_get_locks(trans, path, false);
+ struct get_locks_fail f;
+
+ return btree_path_get_locks(trans, path, false, &f);
}
int __bch2_btree_path_relock(struct btree_trans *trans,
struct btree_path *path, unsigned long trace_ip)
{
- if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+ if (!bch2_btree_path_relock_norestart(trans, path)) {
trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
}
@@ -600,22 +625,22 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
struct btree_path *path,
- unsigned new_locks_want)
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
{
EBUG_ON(path->locks_want >= new_locks_want);
path->locks_want = new_locks_want;
- return btree_path_get_locks(trans, path, true);
+ return btree_path_get_locks(trans, path, true, f);
}
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
- unsigned new_locks_want)
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
{
- struct btree_path *linked;
-
- if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
+ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
return true;
/*
@@ -637,15 +662,19 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
* before interior nodes - now that's handled by
* bch2_btree_path_traverse_all().
*/
- if (!path->cached && !trans->in_traverse_all)
- trans_for_each_path(trans, linked)
+ if (!path->cached && !trans->in_traverse_all) {
+ struct btree_path *linked;
+ unsigned i;
+
+ trans_for_each_path(trans, linked, i)
if (linked != path &&
linked->cached == path->cached &&
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
- btree_path_get_locks(trans, linked, true);
+ btree_path_get_locks(trans, linked, true, NULL);
}
+ }
return false;
}
@@ -654,7 +683,10 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
{
- unsigned l;
+ unsigned l, old_locks_want = path->locks_want;
+
+ if (trans->restarted)
+ return;
EBUG_ON(path->locks_want < new_locks_want);
@@ -674,6 +706,8 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
}
bch2_btree_path_verify_locks(path);
+
+ trace_path_downgrade(trans, _RET_IP_, path, old_locks_want);
}
/* Btree transaction locking: */
@@ -681,37 +715,71 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
void bch2_trans_downgrade(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
- trans_for_each_path(trans, path)
- bch2_btree_path_downgrade(trans, path);
+ if (trans->restarted)
+ return;
+
+ trans_for_each_path(trans, path, i)
+ if (path->ref)
+ bch2_btree_path_downgrade(trans, path);
}
int bch2_trans_relock(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
if (unlikely(trans->restarted))
return -((int) trans->restarted);
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i) {
+ struct get_locks_fail f;
+
if (path->should_be_locked &&
- !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
- trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
+ !btree_path_get_locks(trans, path, false, &f)) {
+ if (trace_trans_restart_relock_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_printf(&buf, " l=%u seq=%u node seq=",
+ f.l, path->l[f.l].lock_seq);
+ if (IS_ERR_OR_NULL(f.b)) {
+ prt_str(&buf, bch2_err_str(PTR_ERR(f.b)));
+ } else {
+ prt_printf(&buf, "%u", f.b->c.lock.seq);
+
+ struct six_lock_count c =
+ bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l);
+ prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
+
+ c = six_lock_counts(&f.b->c.lock);
+ prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
+ }
+
+ trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ count_event(trans->c, trans_restart_relock);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
}
+ }
+
return 0;
}
int bch2_trans_relock_notrace(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
if (unlikely(trans->restarted))
return -((int) trans->restarted);
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
if (path->should_be_locked &&
- !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
+ !bch2_btree_path_relock_norestart(trans, path)) {
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
}
return 0;
@@ -720,24 +788,33 @@ int bch2_trans_relock_notrace(struct btree_trans *trans)
void bch2_trans_unlock_noassert(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
__bch2_btree_path_unlock(trans, path);
}
void bch2_trans_unlock(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
__bch2_btree_path_unlock(trans, path);
}
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+ bch2_trans_unlock(trans);
+ bch2_trans_srcu_unlock(trans);
+}
+
bool bch2_trans_locked(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
if (path->nodes_locked)
return true;
return false;
@@ -783,8 +860,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path)
void bch2_trans_verify_locks(struct btree_trans *trans)
{
struct btree_path *path;
+ unsigned i;
- trans_for_each_path(trans, path)
+ trans_for_each_path(trans, path, i)
bch2_btree_path_verify_locks(path);
}