aboutsummaryrefslogtreecommitdiff
path: root/fs/ocfs2/stack_o2cb.c
diff options
context:
space:
mode:
authorRafael J. Wysocki <rjw@sisk.pl>2011-12-21 09:51:23 +0100
committerRafael J. Wysocki <rjw@sisk.pl>2011-12-21 09:51:23 +0100
commitdfa9d178cd28caf5d76bc4f37f4b3e7e8df8e697 (patch)
treed97ca7790d8a622cba9fdd19832decdb0aa6418a /fs/ocfs2/stack_o2cb.c
parent3f19f08a7ec74cfc50fbad3c5e615760afbd23a0 (diff)
parent7b4050381127ae11fcfc74a106d715a5fbbf888a (diff)
Merge branch 'devfreq-for-next' of git://git.infradead.org/users/kmpark/linux-samsung into pm-devfreq
* 'devfreq-for-next' of git://git.infradead.org/users/kmpark/linux-samsung: (765 commits) PM/Devfreq: Add Exynos4-bus device DVFS driver for Exynos4210/4212/4412. pci: Fix hotplug of Express Module with pci bridges i2c-eg20t: correct the driver init order of pch_i2c_probe() I2C: OMAP: fix FIFO usage for OMAP4 i2c-s3c2410: Fix return code of s3c24xx_i2c_parse_dt_gpio i2c: i2c-s3c2410: Add a cpu_relax() to busy wait for bus idle Linux 3.2-rc6 Revert "drm/i915: fix infinite recursion on unbind due to ilk vt-d w/a" btrfs: lower the dirty balance poll interval drm/i915/dp: Dither down to 6bpc if it makes the mode fit drm/i915: enable semaphores on per-device defaults drm/i915: don't set unpin_work if vblank_get fails drm/i915: By default, enable RC6 on IVB and SNB when reasonable iommu: Export intel_iommu_enabled to signal when iommu is in use drm/i915/sdvo: Include LVDS panels for the IS_DIGITAL check drm/i915: prevent division by zero when asking for chipset power drm/i915: add PCH info to i915_capabilities drm/i915: set the right SDVO transcoder for CPT drm/i915: no-lvds quirk for ASUS AT5NM10T-I sched: Fix select_idle_sibling() regression in selecting an idle SMT sibling ...
Diffstat (limited to 'fs/ocfs2/stack_o2cb.c')
-rw-r--r--fs/ocfs2/stack_o2cb.c71
1 files changed, 63 insertions, 8 deletions
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 19965b00c43c..94368017edb3 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -28,6 +28,7 @@
#include "cluster/masklog.h"
#include "cluster/nodemanager.h"
#include "cluster/heartbeat.h"
+#include "cluster/tcp.h"
#include "stackglue.h"
@@ -256,6 +257,61 @@ static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb)
}
/*
+ * Check if this node is heartbeating and is connected to all other
+ * heartbeating nodes.
+ */
+static int o2cb_cluster_check(void)
+{
+ u8 node_num;
+ int i;
+ unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+
+ node_num = o2nm_this_node();
+ if (node_num == O2NM_MAX_NODES) {
+ printk(KERN_ERR "o2cb: This node has not been configured.\n");
+ return -EINVAL;
+ }
+
+ /*
+ * o2dlm expects o2net sockets to be created. If not, then
+ * dlm_join_domain() fails with a stack of errors which are both cryptic
+ * and incomplete. The idea here is to detect upfront whether we have
+ * managed to connect to all nodes or not. If not, then list the nodes
+ * to allow the user to check the configuration (incorrect IP, firewall,
+ * etc.) Yes, this is racy. But its not the end of the world.
+ */
+#define O2CB_MAP_STABILIZE_COUNT 60
+ for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
+ o2hb_fill_node_map(hbmap, sizeof(hbmap));
+ if (!test_bit(node_num, hbmap)) {
+ printk(KERN_ERR "o2cb: %s heartbeat has not been "
+ "started.\n", (o2hb_global_heartbeat_active() ?
+ "Global" : "Local"));
+ return -EINVAL;
+ }
+ o2net_fill_node_map(netmap, sizeof(netmap));
+ /* Force set the current node to allow easy compare */
+ set_bit(node_num, netmap);
+ if (!memcmp(hbmap, netmap, sizeof(hbmap)))
+ return 0;
+ if (i < O2CB_MAP_STABILIZE_COUNT)
+ msleep(1000);
+ }
+
+ printk(KERN_ERR "o2cb: This node could not connect to nodes:");
+ i = -1;
+ while ((i = find_next_bit(hbmap, O2NM_MAX_NODES,
+ i + 1)) < O2NM_MAX_NODES) {
+ if (!test_bit(i, netmap))
+ printk(" %u", i);
+ }
+ printk(".\n");
+
+ return -ENOTCONN;
+}
+
+/*
* Called from the dlm when it's about to evict a node. This is how the
* classic stack signals node death.
*/
@@ -263,8 +319,8 @@ static void o2dlm_eviction_cb(int node_num, void *data)
{
struct ocfs2_cluster_connection *conn = data;
- mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n",
- node_num, conn->cc_namelen, conn->cc_name);
+ printk(KERN_NOTICE "o2cb: o2dlm has evicted node %d from domain %.*s\n",
+ node_num, conn->cc_namelen, conn->cc_name);
conn->cc_recovery_handler(node_num, conn->cc_recovery_data);
}
@@ -280,12 +336,11 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
BUG_ON(conn == NULL);
BUG_ON(conn->cc_proto == NULL);
- /* for now we only have one cluster/node, make sure we see it
- * in the heartbeat universe */
- if (!o2hb_check_local_node_heartbeating()) {
- if (o2hb_global_heartbeat_active())
- mlog(ML_ERROR, "Global heartbeat not started\n");
- rc = -EINVAL;
+ /* Ensure cluster stack is up and all nodes are connected */
+ rc = o2cb_cluster_check();
+ if (rc) {
+ printk(KERN_ERR "o2cb: Cluster check failed. Fix errors "
+ "before retrying.\n");
goto out;
}