aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpiaojun <[email protected]>2016-08-02 14:02:13 -0700
committerLinus Torvalds <[email protected]>2016-08-02 17:31:41 -0400
commit86b652b93adb57d8fed8edd532ed2eb8a791950d (patch)
treec1ffef69a234dc18a177652ac46daee05cebba0c
parent2070ad1aebfff2c26190188844c38e55d2df2ae2 (diff)
ocfs2/dlm: disable BUG_ON when DLM_LOCK_RES_DROPPING_REF is cleared before dlm_deref_lockres_done_handler
We found a BUG situation in which DLM_LOCK_RES_DROPPING_REF is cleared unexpected that described below. To solve the bug, we disable the BUG_ON and purge lockres in dlm_do_local_recovery_cleanup. Node 1 Node 2(master) dlm_purge_lockres dlm_deref_lockres_handler DLM_LOCK_RES_SETREF_INPROG is set response DLM_DEREF_RESPONSE_INPROG receive DLM_DEREF_RESPONSE_INPROG stop puring in dlm_purge_lockres and wait for DLM_DEREF_RESPONSE_DONE dispatch dlm_deref_lockres_worker response DLM_DEREF_RESPONSE_DONE receive DLM_DEREF_RESPONSE_DONE and prepare to purge lockres Node 2 goes down find Node2 down and do local clean up for Node2: dlm_do_local_recovery_cleanup -> clear DLM_LOCK_RES_DROPPING_REF when purging lockres, BUG_ON happens because DLM_LOCK_RES_DROPPING_REF is clear: dlm_deref_lockres_done_handler ->BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); [[email protected]: fix duplicated write to `ret'] Fixes: 60d663cb5273 ("ocfs2/dlm: add DEREF_DONE message") Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Jun Piao <[email protected]> Reviewed-by: Joseph Qi <[email protected]> Reviewed-by: Jiufei Xue <[email protected]> Reviewed-by: Mark Fasheh <[email protected]> Cc: Joel Becker <[email protected]> Cc: Junxiao Bi <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 13719d3f35f8..525dc06468c4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2416,7 +2416,17 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
}
spin_lock(&res->spinlock);
- BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF));
+ if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) {
+ spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->spinlock);
+ mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
+ "but it is already derefed!\n", dlm->name,
+ res->lockname.len, res->lockname.name, node);
+ dlm_lockres_put(res);
+ ret = 0;
+ goto done;
+ }
+
if (!list_empty(&res->purge)) {
mlog(0, "%s: Removing res %.*s from purgelist\n",
dlm->name, res->lockname.len, res->lockname.name);
@@ -2456,7 +2466,6 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->spinlock);
ret = 0;
-
done:
dlm_put(dlm);
return ret;