diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
| -rw-r--r-- | drivers/nvme/host/core.c | 171 |
1 files changed, 96 insertions, 75 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f13eb4ded95f..0896e21642be 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -279,14 +279,13 @@ static blk_status_t nvme_error_status(u16 status) static void nvme_retry_req(struct request *req) { - struct nvme_ns *ns = req->q->queuedata; unsigned long delay = 0; u16 crd; /* The mask and shift result must be <= 3 */ crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; - if (ns && crd) - delay = ns->ctrl->crdt[crd - 1] * 100; + if (crd) + delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100; nvme_req(req)->retries++; blk_mq_requeue_request(req, false); @@ -356,6 +355,21 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); +/* + * Called to unwind from ->queue_rq on a failed command submission so that the + * multipathing code gets called to potentially failover to another path. + * The caller needs to unwind all transport specific resource allocations and + * must return propagate the return value. + */ +blk_status_t nvme_host_path_error(struct request *req) +{ + nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_set_request_complete(req); + nvme_complete_rq(req); + return BLK_STS_OK; +} +EXPORT_SYMBOL_GPL(nvme_host_path_error); + bool nvme_cancel_request(struct request *req, void *data, bool reserved) { dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, @@ -366,11 +380,32 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) return true; nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; blk_mq_complete_request(req); return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); +void nvme_cancel_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->tagset) { + blk_mq_tagset_busy_iter(ctrl->tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_tagset); + +void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->admin_tagset) { + blk_mq_tagset_busy_iter(ctrl->admin_tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->admin_tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_admin_tagset); + bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { @@ -842,11 +877,11 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, void nvme_cleanup_cmd(struct request *req) { if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - struct nvme_ns *ns = req->rq_disk->private_data; + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; struct page *page = req->special_vec.bv_page; - if (page == ns->ctrl->discard_page) - clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + if (page == ctrl->discard_page) + clear_bit_unlock(0, &ctrl->discard_page_busy); else kfree(page_address(page) + req->special_vec.bv_offset); } @@ -925,7 +960,7 @@ static void nvme_execute_rq_polled(struct request_queue *q, rq->cmd_flags |= REQ_HIPRI; rq->end_io_data = &wait; - blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq); + blk_execute_rq_nowait(bd_disk, rq, at_head, nvme_end_sync_rq); while (!completion_done(&wait)) { blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); @@ -964,7 +999,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, if (poll) nvme_execute_rq_polled(req->q, NULL, req, at_head); else - blk_execute_rq(req->q, NULL, req, at_head); + blk_execute_rq(NULL, req, at_head); if (result) *result = nvme_req(req)->result; if (nvme_req(req)->flags & NVME_REQ_CANCELLED) @@ -1101,7 +1136,7 @@ void nvme_execute_passthru_rq(struct request *rq) u32 effects; effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - blk_execute_rq(rq->q, disk, rq, 0); + blk_execute_rq(disk, rq, 0); nvme_passthru_end(ctrl, effects); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); @@ -1113,7 +1148,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; - struct gendisk *disk = ns ? ns->disk : NULL; + struct block_device *bdev = ns ? ns->disk->part0 : NULL; struct request *req; struct bio *bio = NULL; void *meta = NULL; @@ -1133,8 +1168,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (ret) goto out; bio = req->bio; - bio->bi_disk = disk; - if (disk && meta_buffer && meta_len) { + if (bdev) + bio_set_dev(bio, bdev); + if (bdev && meta_buffer && meta_len) { meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, meta_seed, write); if (IS_ERR(meta)) { @@ -1190,28 +1226,12 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } -static int nvme_keep_alive(struct nvme_ctrl *ctrl) -{ - struct request *rq; - - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - rq->timeout = ctrl->kato * HZ; - rq->end_io_data = ctrl; - - blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io); - - return 0; -} - static void nvme_keep_alive_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, ka_work); bool comp_seen = ctrl->comp_seen; + struct request *rq; if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { dev_dbg(ctrl->device, @@ -1221,12 +1241,18 @@ static void nvme_keep_alive_work(struct work_struct *work) return; } - if (nvme_keep_alive(ctrl)) { + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + if (IS_ERR(rq)) { /* allocation failure, reset the controller */ - dev_err(ctrl->device, "keep-alive failed\n"); + dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); nvme_reset_ctrl(ctrl); return; } + + rq->timeout = ctrl->kato * HZ; + rq->end_io_data = ctrl; + blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io); } static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) @@ -1405,7 +1431,7 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_free_id; } - error = -ENODEV; + error = NVME_SC_INVALID_NS | NVME_SC_DNR; if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; @@ -1928,30 +1954,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) +/* + * Even though NVMe spec explicitly states that MDTS is not applicable to the + * write-zeroes, we are cautious and limit the size to the controllers + * max_hw_sectors value, which is based on the MDTS field and possibly other + * limiting factors. + */ +static void nvme_config_write_zeroes(struct request_queue *q, + struct nvme_ctrl *ctrl) { - u64 max_blocks; - - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) - return; - /* - * Even though NVMe spec explicitly states that MDTS is not - * applicable to the write-zeroes:- "The restriction does not apply to - * commands that do not transfer data between the host and the - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". - * In order to be more cautious use controller's max_hw_sectors value - * to configure the maximum sectors for the write-zeroes which is - * configured based on the controller's MDTS field in the - * nvme_init_identify() if available. - */ - if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_blocks = (u64)USHRT_MAX + 1; - else - max_blocks = ns->ctrl->max_hw_sectors + 1; - - blk_queue_max_write_zeroes_sectors(disk->queue, - nvme_lba_to_sect(ns, max_blocks)); + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors); } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -2123,11 +2137,10 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(disk, ns); + nvme_config_write_zeroes(disk->queue, ns->ctrl); - if ((id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)) - set_disk_ro(disk, true); + set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -2176,17 +2189,18 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); + ret = nvme_configure_metadata(ns, id); + if (ret) + goto out_unfreeze; + nvme_set_chunk_sectors(ns, id); + nvme_update_disk_info(ns->disk, ns, id); + if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); if (ret) goto out_unfreeze; } - ret = nvme_configure_metadata(ns, id); - if (ret) - goto out_unfreeze; - nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->disk, ns, id); blk_mq_unfreeze_queue(ns->disk->queue); if (blk_queue_is_zoned(ns->queue)) { @@ -2830,7 +2844,7 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev, struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - return snprintf(buf, PAGE_SIZE, "%s\n", subsys->subnqn); + return sysfs_emit(buf, "%s\n", subsys->subnqn); } static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); @@ -2860,7 +2874,7 @@ static struct attribute *nvme_subsys_attrs[] = { NULL, }; -static struct attribute_group nvme_subsys_attrs_group = { +static const struct attribute_group nvme_subsys_attrs_group = { .attrs = nvme_subsys_attrs, }; @@ -3523,7 +3537,7 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->ops->name); + return sysfs_emit(buf, "%s\n", ctrl->ops->name); } static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); @@ -3557,7 +3571,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subsys->subnqn); + return sysfs_emit(buf, "%s\n", ctrl->subsys->subnqn); } static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); @@ -3567,7 +3581,7 @@ static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn); + return sysfs_emit(buf, "%s\n", ctrl->opts->host->nqn); } static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); @@ -3577,7 +3591,7 @@ static ssize_t nvme_sysfs_show_hostid(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id); + return sysfs_emit(buf, "%pU\n", &ctrl->opts->host->id); } static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL); @@ -3695,7 +3709,7 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return a->mode; } -static struct attribute_group nvme_dev_attrs_group = { +static const struct attribute_group nvme_dev_attrs_group = { .attrs = nvme_dev_attrs, .is_visible = nvme_dev_attrs_are_visible, }; @@ -4003,7 +4017,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { struct nvme_id_ns *id; - int ret = -ENODEV; + int ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (test_bit(NVME_NS_DEAD, &ns->flags)) goto out; @@ -4012,7 +4026,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) if (ret) goto out; - ret = -ENODEV; + ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { dev_err(ns->ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); @@ -4030,7 +4044,7 @@ out: * * TODO: we should probably schedule a delayed retry here. */ - if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) + if (ret > 0 && (ret & NVME_SC_DNR)) nvme_ns_remove(ns); } @@ -4060,6 +4074,12 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nsid); break; } + if (!nvme_multi_css(ctrl)) { + dev_warn(ctrl->device, + "command set not reported for nsid: %d\n", + nsid); + break; + } nvme_alloc_ns(ctrl, nsid, &ids); break; default: @@ -4438,6 +4458,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_hwmon_exit(ctrl); nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); @@ -4450,7 +4471,7 @@ static void nvme_free_cels(struct nvme_ctrl *ctrl) struct nvme_effects_log *cel; unsigned long i; - xa_for_each (&ctrl->cels, i, cel) { + xa_for_each(&ctrl->cels, i, cel) { xa_erase(&ctrl->cels, i); kfree(cel); } |