aboutsummaryrefslogtreecommitdiff
path: root/net/ceph/osdmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r--net/ceph/osdmap.c214
1 files changed, 162 insertions, 52 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index cde52e94732f..d2436880b305 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1261,6 +1261,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
}
/*
+ * Encoding order is (new_up_client, new_state, new_weight). Need to
+ * apply in the (new_weight, new_state, new_up_client) order, because
+ * an incremental map may look like e.g.
+ *
+ * new_up_client: { osd=6, addr=... } # set osd_state and addr
+ * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
+ */
+static int decode_new_up_state_weight(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ void *new_up_client;
+ void *new_state;
+ void *new_weight_end;
+ u32 len;
+
+ new_up_client = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ new_state = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(u8);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ /* new_weight */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ s32 osd;
+ u32 w;
+
+ ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
+ osd = ceph_decode_32(p);
+ w = ceph_decode_32(p);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
+ map->osd_weight[osd] = w;
+
+ /*
+ * If we are marking in, set the EXISTS, and clear the
+ * AUTOOUT and NEW bits.
+ */
+ if (w) {
+ map->osd_state[osd] |= CEPH_OSD_EXISTS;
+ map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
+ CEPH_OSD_NEW);
+ }
+ }
+ new_weight_end = *p;
+
+ /* new_state (up/down) */
+ *p = new_state;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ u8 xorstate;
+ int ret;
+
+ osd = ceph_decode_32(p);
+ xorstate = ceph_decode_8(p);
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ BUG_ON(osd >= map->max_osd);
+ if ((map->osd_state[osd] & CEPH_OSD_UP) &&
+ (xorstate & CEPH_OSD_UP))
+ pr_info("osd%d down\n", osd);
+ if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
+ (xorstate & CEPH_OSD_EXISTS)) {
+ pr_info("osd%d does not exist\n", osd);
+ map->osd_weight[osd] = CEPH_OSD_IN;
+ ret = set_primary_affinity(map, osd,
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
+ if (ret)
+ return ret;
+ memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
+ map->osd_state[osd] = 0;
+ } else {
+ map->osd_state[osd] ^= xorstate;
+ }
+ }
+
+ /* new_up_client */
+ *p = new_up_client;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ struct ceph_entity_addr addr;
+
+ osd = ceph_decode_32(p);
+ ceph_decode_copy(p, &addr, sizeof(addr));
+ ceph_decode_addr(&addr);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d up\n", osd);
+ map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
+ map->osd_addr[osd] = addr;
+ }
+
+ *p = new_weight_end;
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
@@ -1358,49 +1467,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
__remove_pg_pool(&map->pg_pools, pi);
}
- /* new_up */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- struct ceph_entity_addr addr;
- ceph_decode_32_safe(p, end, osd, e_inval);
- ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
- ceph_decode_addr(&addr);
- pr_info("osd%d up\n", osd);
- BUG_ON(osd >= map->max_osd);
- map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
- map->osd_addr[osd] = addr;
- }
-
- /* new_state */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- u8 xorstate;
- ceph_decode_32_safe(p, end, osd, e_inval);
- xorstate = **(u8 **)p;
- (*p)++; /* clean flag */
- if (xorstate == 0)
- xorstate = CEPH_OSD_UP;
- if (xorstate & CEPH_OSD_UP)
- pr_info("osd%d down\n", osd);
- if (osd < map->max_osd)
- map->osd_state[osd] ^= xorstate;
- }
-
- /* new_weight */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd, off;
- ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
- osd = ceph_decode_32(p);
- off = ceph_decode_32(p);
- pr_info("osd%d weight 0x%x %s\n", osd, off,
- off == CEPH_OSD_IN ? "(in)" :
- (off == CEPH_OSD_OUT ? "(out)" : ""));
- if (osd < map->max_osd)
- map->osd_weight[osd] = off;
- }
+ /* new_up_client, new_state, new_weight */
+ err = decode_new_up_state_weight(p, end, map);
+ if (err)
+ goto bad;
/* new_pg_temp */
err = decode_new_pg_temp(p, end, map);
@@ -1440,6 +1510,24 @@ bad:
return ERR_PTR(err);
}
+void ceph_oloc_copy(struct ceph_object_locator *dest,
+ const struct ceph_object_locator *src)
+{
+ WARN_ON(!ceph_oloc_empty(dest));
+ WARN_ON(dest->pool_ns); /* empty() only covers ->pool */
+
+ dest->pool = src->pool;
+ if (src->pool_ns)
+ dest->pool_ns = ceph_get_string(src->pool_ns);
+}
+EXPORT_SYMBOL(ceph_oloc_copy);
+
+void ceph_oloc_destroy(struct ceph_object_locator *oloc)
+{
+ ceph_put_string(oloc->pool_ns);
+}
+EXPORT_SYMBOL(ceph_oloc_destroy);
+
void ceph_oid_copy(struct ceph_object_id *dest,
const struct ceph_object_id *src)
{
@@ -1700,9 +1788,9 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 *ono,
u64 *oxoff, u64 *oxlen)
{
- u32 osize = le32_to_cpu(layout->fl_object_size);
- u32 su = le32_to_cpu(layout->fl_stripe_unit);
- u32 sc = le32_to_cpu(layout->fl_stripe_count);
+ u32 osize = layout->object_size;
+ u32 su = layout->stripe_unit;
+ u32 sc = layout->stripe_count;
u32 bl, stripeno, stripepos, objsetno;
u32 su_per_object;
u64 t, su_offset;
@@ -1774,12 +1862,34 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
if (!pi)
return -ENOENT;
- raw_pgid->pool = oloc->pool;
- raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name,
- oid->name_len);
-
- dout("%s %*pE -> raw_pgid %llu.%x\n", __func__, oid->name_len,
- oid->name, raw_pgid->pool, raw_pgid->seed);
+ if (!oloc->pool_ns) {
+ raw_pgid->pool = oloc->pool;
+ raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name,
+ oid->name_len);
+ dout("%s %s -> raw_pgid %llu.%x\n", __func__, oid->name,
+ raw_pgid->pool, raw_pgid->seed);
+ } else {
+ char stack_buf[256];
+ char *buf = stack_buf;
+ int nsl = oloc->pool_ns->len;
+ size_t total = nsl + 1 + oid->name_len;
+
+ if (total > sizeof(stack_buf)) {
+ buf = kmalloc(total, GFP_NOIO);
+ if (!buf)
+ return -ENOMEM;
+ }
+ memcpy(buf, oloc->pool_ns->str, nsl);
+ buf[nsl] = '\037';
+ memcpy(buf + nsl + 1, oid->name, oid->name_len);
+ raw_pgid->pool = oloc->pool;
+ raw_pgid->seed = ceph_str_hash(pi->object_hash, buf, total);
+ if (buf != stack_buf)
+ kfree(buf);
+ dout("%s %s ns %.*s -> raw_pgid %llu.%x\n", __func__,
+ oid->name, nsl, oloc->pool_ns->str,
+ raw_pgid->pool, raw_pgid->seed);
+ }
return 0;
}
EXPORT_SYMBOL(ceph_object_locator_to_pg);