diff options
Diffstat (limited to 'net/ceph')
-rw-r--r-- | net/ceph/osd_client.c | 51 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 160 |
2 files changed, 140 insertions, 71 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0160d7d09a1e..89469592076c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1276,9 +1276,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, const struct ceph_osd_request_target *t, struct ceph_pg_pool_info *pi) { - bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || __pool_full(pi); WARN_ON(pi->id != t->base_oloc.pool); @@ -1303,8 +1303,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, bool force_resend = false; bool need_check_tiering = false; bool need_resend = false; - bool sort_bitwise = ceph_osdmap_flag(osdc->osdmap, - CEPH_OSDMAP_SORTBITWISE); + bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); enum calc_target_result ct_res; int ret; @@ -1540,9 +1539,9 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg) */ msg->hdr.data_off = cpu_to_le16(req->r_data_offset); - dout("%s req %p oid %*pE oid_len %d front %zu data %u\n", __func__, - req, req->r_t.target_oid.name_len, req->r_t.target_oid.name, - req->r_t.target_oid.name_len, msg->front.iov_len, data_len); + dout("%s req %p oid %s oid_len %d front %zu data %u\n", __func__, + req, req->r_t.target_oid.name, req->r_t.target_oid.name_len, + msg->front.iov_len, data_len); } /* @@ -1590,9 +1589,9 @@ static void maybe_request_map(struct ceph_osd_client *osdc) verify_osdc_locked(osdc); WARN_ON(!osdc->osdmap->epoch); - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("%s osdc %p continuous\n", __func__, osdc); continuous = true; } else { @@ -1629,19 +1628,19 @@ again: } if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR)) { + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("req %p pausewr\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_READ) && - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD)) { + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("req %p pauserd\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | CEPH_OSD_FLAG_FULL_FORCE)) && - (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || pool_full(osdc, req->r_t.base_oloc.pool))) { dout("req %p full/pool_full\n", req); pr_warn_ratelimited("FULL or reached pool quota\n"); @@ -2280,7 +2279,7 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq) struct ceph_osd_request *req = lreq->ping_req; struct ceph_osd_req_op *op = &req->r_ops[0]; - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); return; } @@ -2893,6 +2892,9 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) dout("req %p tid %llu cb\n", req, req->r_tid); __complete_request(req); } + if (m.flags & CEPH_OSD_FLAG_ONDISK) + complete_all(&req->r_safe_completion); + ceph_osdc_put_request(req); } else { if (req->r_unsafe_callback) { dout("req %p tid %llu unsafe-cb\n", req, req->r_tid); @@ -2901,10 +2903,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) WARN_ON(1); } } - if (m.flags & CEPH_OSD_FLAG_ONDISK) - complete_all(&req->r_safe_completion); - ceph_osdc_put_request(req); return; fail_request: @@ -3050,7 +3049,7 @@ static int handle_one_map(struct ceph_osd_client *osdc, bool skipped_map = false; bool was_full; - was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + was_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); set_pool_was_full(osdc); if (incremental) @@ -3088,7 +3087,7 @@ static int handle_one_map(struct ceph_osd_client *osdc, osdc->osdmap = newmap; } - was_full &= !ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + was_full &= !ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); scan_requests(&osdc->homeless_osd, skipped_map, was_full, true, need_resend, need_resend_linger); @@ -3174,9 +3173,9 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) if (ceph_check_fsid(osdc->client, &fsid) < 0) goto bad; - was_pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - was_pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + was_pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + was_pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); /* incremental maps */ @@ -3238,9 +3237,9 @@ done: * we find out when we are no longer full and stop returning * ENOSPC. */ - pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); if (was_pauserd || was_pausewr || pauserd || pausewr) maybe_request_map(osdc); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index cde52e94732f..7e480bf75bcf 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1261,6 +1261,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) } /* + * Encoding order is (new_up_client, new_state, new_weight). Need to + * apply in the (new_weight, new_state, new_up_client) order, because + * an incremental map may look like e.g. + * + * new_up_client: { osd=6, addr=... } # set osd_state and addr + * new_state: { osd=6, xorstate=EXISTS } # clear osd_state + */ +static int decode_new_up_state_weight(void **p, void *end, + struct ceph_osdmap *map) +{ + void *new_up_client; + void *new_state; + void *new_weight_end; + u32 len; + + new_up_client = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(struct ceph_entity_addr); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + new_state = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(u8); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + /* new_weight */ + ceph_decode_32_safe(p, end, len, e_inval); + while (len--) { + s32 osd; + u32 w; + + ceph_decode_need(p, end, 2*sizeof(u32), e_inval); + osd = ceph_decode_32(p); + w = ceph_decode_32(p); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d weight 0x%x %s\n", osd, w, + w == CEPH_OSD_IN ? "(in)" : + (w == CEPH_OSD_OUT ? "(out)" : "")); + map->osd_weight[osd] = w; + + /* + * If we are marking in, set the EXISTS, and clear the + * AUTOOUT and NEW bits. + */ + if (w) { + map->osd_state[osd] |= CEPH_OSD_EXISTS; + map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT | + CEPH_OSD_NEW); + } + } + new_weight_end = *p; + + /* new_state (up/down) */ + *p = new_state; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + u8 xorstate; + int ret; + + osd = ceph_decode_32(p); + xorstate = ceph_decode_8(p); + if (xorstate == 0) + xorstate = CEPH_OSD_UP; + BUG_ON(osd >= map->max_osd); + if ((map->osd_state[osd] & CEPH_OSD_UP) && + (xorstate & CEPH_OSD_UP)) + pr_info("osd%d down\n", osd); + if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && + (xorstate & CEPH_OSD_EXISTS)) { + pr_info("osd%d does not exist\n", osd); + map->osd_weight[osd] = CEPH_OSD_IN; + ret = set_primary_affinity(map, osd, + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); + if (ret) + return ret; + memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr)); + map->osd_state[osd] = 0; + } else { + map->osd_state[osd] ^= xorstate; + } + } + + /* new_up_client */ + *p = new_up_client; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + struct ceph_entity_addr addr; + + osd = ceph_decode_32(p); + ceph_decode_copy(p, &addr, sizeof(addr)); + ceph_decode_addr(&addr); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d up\n", osd); + map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; + map->osd_addr[osd] = addr; + } + + *p = new_weight_end; + return 0; + +e_inval: + return -EINVAL; +} + +/* * decode and apply an incremental map update. */ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, @@ -1358,49 +1467,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, __remove_pg_pool(&map->pg_pools, pi); } - /* new_up */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd; - struct ceph_entity_addr addr; - ceph_decode_32_safe(p, end, osd, e_inval); - ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval); - ceph_decode_addr(&addr); - pr_info("osd%d up\n", osd); - BUG_ON(osd >= map->max_osd); - map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; - map->osd_addr[osd] = addr; - } - - /* new_state */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd; - u8 xorstate; - ceph_decode_32_safe(p, end, osd, e_inval); - xorstate = **(u8 **)p; - (*p)++; /* clean flag */ - if (xorstate == 0) - xorstate = CEPH_OSD_UP; - if (xorstate & CEPH_OSD_UP) - pr_info("osd%d down\n", osd); - if (osd < map->max_osd) - map->osd_state[osd] ^= xorstate; - } - - /* new_weight */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd, off; - ceph_decode_need(p, end, sizeof(u32)*2, e_inval); - osd = ceph_decode_32(p); - off = ceph_decode_32(p); - pr_info("osd%d weight 0x%x %s\n", osd, off, - off == CEPH_OSD_IN ? "(in)" : - (off == CEPH_OSD_OUT ? "(out)" : "")); - if (osd < map->max_osd) - map->osd_weight[osd] = off; - } + /* new_up_client, new_state, new_weight */ + err = decode_new_up_state_weight(p, end, map); + if (err) + goto bad; /* new_pg_temp */ err = decode_new_pg_temp(p, end, map); @@ -1778,8 +1848,8 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name, oid->name_len); - dout("%s %*pE -> raw_pgid %llu.%x\n", __func__, oid->name_len, - oid->name, raw_pgid->pool, raw_pgid->seed); + dout("%s %s -> raw_pgid %llu.%x\n", __func__, oid->name, + raw_pgid->pool, raw_pgid->seed); return 0; } EXPORT_SYMBOL(ceph_object_locator_to_pg); |