diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/Kconfig | 5 | ||||
-rw-r--r-- | drivers/nvme/host/auth.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 110 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 46 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 21 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 11 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 30 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 24 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 59 |
10 files changed, 191 insertions, 122 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 8fe2dd619e80..b309c8be720f 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -107,11 +107,12 @@ config NVME_TCP_TLS If unsure, say N. config NVME_HOST_AUTH - bool "NVM Express over Fabrics In-Band Authentication" + bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH help - This provides support for NVMe over Fabrics In-Band Authentication. + This provides support for NVMe over Fabrics In-Band Authentication in + host side. If unsure, say N. diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 48328e36e93b..72c0525c75f5 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -757,12 +757,11 @@ static void nvme_queue_auth_work(struct work_struct *work) __func__, chap->qid); mutex_lock(&ctrl->dhchap_auth_mutex); ret = nvme_auth_dhchap_setup_host_response(ctrl, chap); + mutex_unlock(&ctrl->dhchap_auth_mutex); if (ret) { - mutex_unlock(&ctrl->dhchap_auth_mutex); chap->error = ret; goto fail2; } - mutex_unlock(&ctrl->dhchap_auth_mutex); /* DH-HMAC-CHAP Step 3: send reply */ dev_dbg(ctrl->device, "%s: qid %d send reply\n", @@ -839,6 +838,8 @@ static void nvme_queue_auth_work(struct work_struct *work) } fail2: + if (chap->status == 0) + chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; dev_dbg(ctrl->device, "%s: qid %d send failure2, status %x\n", __func__, chap->qid, chap->status); tl = nvme_auth_set_dhchap_failure2_data(ctrl, chap); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88b54cdcbd68..60f14019f981 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl) /* * Only new queue scan work when admin and IO queues are both alive */ - if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset) queue_work(nvme_wq, &ctrl->scan_work); } @@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl) */ int nvme_try_sched_reset(struct nvme_ctrl *ctrl) { - if (ctrl->state != NVME_CTRL_RESETTING) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING) return -EBUSY; if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; @@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, failfast_work); - if (ctrl->state != NVME_CTRL_CONNECTING) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING) return; set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); @@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) ret = -ENETRESET; } @@ -482,7 +482,6 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) { - nvme_stop_keep_alive(ctrl); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); @@ -500,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, spin_lock_irqsave(&ctrl->lock, flags); - old_state = ctrl->state; + old_state = nvme_ctrl_state(ctrl); switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { @@ -568,7 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } if (changed) { - ctrl->state = new_state; + WRITE_ONCE(ctrl->state, new_state); wake_up_all(&ctrl->state_wq); } @@ -576,11 +575,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, if (!changed) return false; - if (ctrl->state == NVME_CTRL_LIVE) { + if (new_state == NVME_CTRL_LIVE) { if (old_state == NVME_CTRL_CONNECTING) nvme_stop_failfast_work(ctrl); nvme_kick_requeue_lists(ctrl); - } else if (ctrl->state == NVME_CTRL_CONNECTING && + } else if (new_state == NVME_CTRL_CONNECTING && old_state == NVME_CTRL_RESETTING) { nvme_start_failfast_work(ctrl); } @@ -593,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); */ static bool nvme_state_terminal(struct nvme_ctrl *ctrl) { - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: @@ -618,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl) wait_event(ctrl->state_wq, nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) || nvme_state_terminal(ctrl)); - return ctrl->state == NVME_CTRL_RESETTING; + return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING; } EXPORT_SYMBOL_GPL(nvme_wait_reset); @@ -705,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq) { - if (ctrl->state != NVME_CTRL_DELETING_NOIO && - ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DEAD && + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (state != NVME_CTRL_DELETING_NOIO && + state != NVME_CTRL_DELETING && + state != NVME_CTRL_DEAD && !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; @@ -737,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, * command, which is require to set the queue live in the * appropinquate states. */ - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && (req->cmd->fabrics.fctype == nvme_fabrics_type_connect || @@ -1193,8 +1194,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) { - queue_delayed_work(nvme_wq, &ctrl->ka_work, - nvme_keep_alive_work_period(ctrl)); + unsigned long now = jiffies; + unsigned long delay = nvme_keep_alive_work_period(ctrl); + unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay; + + if (time_after(now, ka_next_check_tm)) + delay = 0; + else + delay = ka_next_check_tm - now; + + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); } static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, @@ -1480,7 +1489,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, if (id->ncap == 0) { /* namespace not allocated or attached */ info->is_removed = true; - return -ENODEV; + ret = -ENODEV; + goto error; } info->anagrpid = id->anagrpid; @@ -1498,8 +1508,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) memcpy(ids->nguid, id->nguid, sizeof(ids->nguid)); } + +error: kfree(id); - return 0; + return ret; } static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, @@ -1814,16 +1826,18 @@ set_pi: return ret; } -static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; + int ret; - if (nvme_init_ms(ns, id)) - return; + ret = nvme_init_ms(ns, id); + if (ret) + return ret; ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return; + return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -1832,7 +1846,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return; + return 0; ns->features |= NVME_NS_EXT_LBAS; @@ -1859,6 +1873,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } + return 0; } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, @@ -1888,9 +1903,10 @@ static void nvme_update_disk_info(struct gendisk *disk, /* * The block layer can't support LBA sizes larger than the page size - * yet, so catch this early and don't allow block I/O. + * or smaller than a sector size yet, so catch this early and don't + * allow block I/O. */ - if (ns->lba_shift > PAGE_SHIFT) { + if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -2027,12 +2043,23 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (ret) return ret; + if (id->ncap == 0) { + /* namespace not allocated or attached */ + info->is_removed = true; + ret = -ENODEV; + goto error; + } + blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns, id); + if (ret < 0) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); @@ -2084,6 +2111,8 @@ out: set_bit(NVME_NS_READY, &ns->flags); ret = 0; } + +error: kfree(id); return ret; } @@ -2523,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val) if (ctrl->ps_max_latency_us != latency) { ctrl->ps_max_latency_us = latency; - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE) nvme_configure_apst(ctrl); } } @@ -3211,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_LIVE: break; default: @@ -3633,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); + /* + * Ensure that no namespaces are added to the ctrl list after the queues + * are frozen, thereby avoiding a deadlock between scan and reset. + */ + if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) { + up_write(&ctrl->namespaces_rwsem); + goto out_unlink_ns; + } nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); @@ -3897,7 +3934,7 @@ static void nvme_scan_work(struct work_struct *work) int ret; /* No tagset on a live ctrl means IO queues could not created */ - if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset) return; /* @@ -3967,7 +4004,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) * removing the namespaces' disks; fail all the queues now to avoid * potentially having to clean up the failed sync later. */ - if (ctrl->state == NVME_CTRL_DEAD) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD) nvme_mark_namespaces_dead(ctrl); /* this is a no-op when called from the controller reset handler */ @@ -4049,7 +4086,7 @@ static void nvme_async_event_work(struct work_struct *work) * flushing ctrl async_event_work after changing the controller state * from LIVE and before freeing the admin queue. */ - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE) ctrl->ops->submit_async_event(ctrl); } @@ -4100,6 +4137,8 @@ static void nvme_fw_act_work(struct work_struct *work) struct nvme_ctrl, fw_act_work); unsigned long fw_act_timeout; + nvme_auth_stop(ctrl); + if (ctrl->mtfa) fw_act_timeout = jiffies + msecs_to_jiffies(ctrl->mtfa * 100); @@ -4155,7 +4194,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) * firmware activation. */ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) { - nvme_auth_stop(ctrl); requeue = false; queue_work(nvme_wq, &ctrl->fw_act_work); } @@ -4348,6 +4386,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); + nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4443,7 +4482,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, { int ret; - ctrl->state = NVME_CTRL_NEW; + WRITE_ONCE(ctrl->state, NVME_CTRL_NEW); clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); @@ -4464,6 +4503,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; + ctrl->ka_last_check_time = jiffies; BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > PAGE_SIZE); @@ -4552,6 +4592,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unfreeze_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); + clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); } EXPORT_SYMBOL_GPL(nvme_unfreeze); @@ -4585,6 +4626,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; + set_bit(NVME_CTRL_FROZEN, &ctrl->flags); down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_freeze_queue_start(ns->queue); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 4673ead69c5f..aa88606a44c4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -667,8 +667,10 @@ static const match_table_t opt_tokens = { #endif { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_DISCOVERY, "discovery" }, +#ifdef CONFIG_NVME_HOST_AUTH { NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" }, { NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" }, +#endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, #endif diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 49c3e46eaa1e..1d51925ea67f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) static void nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) { - switch (ctrl->ctrl.state) { + switch (nvme_ctrl_state(&ctrl->ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: /* @@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (ctrl->ctrl.state) { + switch (nvme_ctrl_state(&ctrl->ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: /* @@ -2530,12 +2530,6 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ nvme_quiesce_admin_queue(&ctrl->ctrl); - - /* - * Open-coding nvme_cancel_admin_tagset() as fc - * is not using nvme_cancel_request(). - */ - nvme_stop_keep_alive(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2554,24 +2548,17 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - enum nvme_ctrl_state state; - unsigned long flags; - - spin_lock_irqsave(&ctrl->lock, flags); - state = ctrl->ctrl.state; - if (state == NVME_CTRL_CONNECTING) { - set_bit(ASSOC_FAILED, &ctrl->flags); - spin_unlock_irqrestore(&ctrl->lock, flags); + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); + set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); return; } - spin_unlock_irqrestore(&ctrl->lock, flags); /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (state != NVME_CTRL_LIVE) + if (ctrl->ctrl.state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -3138,11 +3125,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_disconnect_admin_queue; - + if (test_bit(ASSOC_FAILED, &ctrl->flags)) { + ret = -EIO; + goto out_stop_keep_alive; + } /* sanity checks */ /* FC-NVME does not have other data in the capsule */ @@ -3150,7 +3138,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", ctrl->ctrl.icdoff); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } /* FC-NVME supports normal SGL Data Block Descriptors */ @@ -3158,7 +3146,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "Mandatory sgls are not supported!\n"); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } if (opts->queue_size > ctrl->ctrl.maxcmd) { @@ -3185,16 +3173,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - - spin_lock_irqsave(&ctrl->lock, flags); if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) ret = -EIO; - if (ret) { - spin_unlock_irqrestore(&ctrl->lock, flags); + if (ret) goto out_term_aen_ops; - } + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - spin_unlock_irqrestore(&ctrl->lock, flags); ctrl->ctrl.nr_reconnects = 0; @@ -3205,6 +3189,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); +out_stop_keep_alive: + nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", @@ -3322,7 +3308,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; bool recon = true; - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING) return; if (portptr->port_state == FC_OBJSTATE_ONLINE) { diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 529b9954d2b8..4939ed35638f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -18,15 +18,12 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, { u32 effects; - if (capable(CAP_SYS_ADMIN)) - return true; - /* * Do not allow unprivileged passthrough on partitions, as that allows an * escape from the containment of the partition. */ if (flags & NVME_IOCTL_PARTITION) - return false; + goto admin; /* * Do not allow unprivileged processes to send vendor specific or fabrics @@ -34,7 +31,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, */ if (c->common.opcode >= nvme_cmd_vendor_start || c->common.opcode == nvme_fabrics_command) - return false; + goto admin; /* * Do not allow unprivileged passthrough of admin commands except @@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, return true; } } - return false; + goto admin; } /* @@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, */ effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); if (!(effects & NVME_CMD_EFFECTS_CSUPP)) - return false; + goto admin; /* * Don't allow passthrough for command that have intrusive (or unknown) @@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_UUID_SEL | NVME_CMD_EFFECTS_SCOPE_MASK)) - return false; + goto admin; /* * Only allow I/O commands that transfer data to the controller or that * change the logical block contents if the file descriptor is open for * writing. */ - if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) - return open_for_write; + if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && + !open_for_write) + goto admin; + return true; +admin: + return capable(CAP_SYS_ADMIN); } /* diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 39a90b7cb125..e7411dac00f7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -156,6 +156,11 @@ enum nvme_quirks { * No temperature thresholds for channels other than 0 (Composite). */ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19), + + /* + * Disables simple suspend/resume path. + */ + NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20), }; /* @@ -251,6 +256,7 @@ enum nvme_ctrl_flags { NVME_CTRL_STOPPED = 3, NVME_CTRL_SKIP_ID_CNS_CS = 4, NVME_CTRL_DIRTY_CAPABILITY = 5, + NVME_CTRL_FROZEN = 6, }; struct nvme_ctrl { @@ -387,6 +393,11 @@ struct nvme_ctrl { enum nvme_dctype dctype; }; +static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) +{ + return READ_ONCE(ctrl->state); +} + enum nvme_iopolicy { NVME_IOPOLICY_NUMA, NVME_IOPOLICY_RR, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 507bc149046d..61af7ff1a9d6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1233,7 +1233,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); /* If there is a reset/reinit ongoing, we shouldn't reset again. */ - switch (dev->ctrl.state) { + switch (nvme_ctrl_state(&dev->ctrl)) { case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: return false; @@ -1321,7 +1321,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * cancellation error. All outstanding requests are completed on * shutdown, so we return BLK_EH_DONE. */ - switch (dev->ctrl.state) { + switch (nvme_ctrl_state(&dev->ctrl)) { case NVME_CTRL_CONNECTING: nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); fallthrough; @@ -1593,7 +1593,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev) /* * Controller is in wrong state, fail early. */ - if (dev->ctrl.state != NVME_CTRL_CONNECTING) { + if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) { mutex_unlock(&dev->shutdown_lock); return -ENODEV; } @@ -2573,13 +2573,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { + enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl); struct pci_dev *pdev = to_pci_dev(dev->dev); bool dead; mutex_lock(&dev->shutdown_lock); dead = nvme_pci_ctrl_is_dead(dev); - if (dev->ctrl.state == NVME_CTRL_LIVE || - dev->ctrl.state == NVME_CTRL_RESETTING) { + if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) { if (pci_is_enabled(pdev)) nvme_start_freeze(&dev->ctrl); /* @@ -2690,7 +2690,7 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (dev->ctrl.state != NVME_CTRL_RESETTING) { + if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", dev->ctrl.state); result = -ENODEV; @@ -2902,6 +2902,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && dmi_match(DMI_BOARD_NAME, "LNVNB161216")) return NVME_QUIRK_SIMPLE_SUSPEND; + } else if (pdev->vendor == 0x2646 && (pdev->device == 0x2263 || + pdev->device == 0x500f)) { + /* + * Exclude some Kingston NV1 and A2000 devices from + * NVME_QUIRK_SIMPLE_SUSPEND. Do a full suspend to save a + * lot fo energy with s2idle sleep on some TUXEDO platforms. + */ + if (dmi_match(DMI_BOARD_NAME, "NS5X_NS7XAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") || + dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1")) + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } return 0; @@ -2932,7 +2944,9 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->dev = get_device(&pdev->dev); quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && acpi_storage_d3(&pdev->dev)) { + if (!noacpi && + !(quirks & NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND) && + acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on * platforms that support kernel managed suspend. @@ -3192,7 +3206,7 @@ static int nvme_suspend(struct device *dev) nvme_wait_freeze(ctrl); nvme_sync_queues(ctrl); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) goto unfreeze; /* diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a7fea4cbacd7..81e2621169e5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -984,10 +984,11 @@ free_ctrl: static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* If we are resetting/deleting then do nothing */ - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_LIVE); + if (state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); return; } @@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) * unless we're during creation of a new controller to * avoid races with teardown flow. */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; @@ -1080,6 +1083,7 @@ destroy_io: nvme_rdma_free_io_queues(ctrl); } destroy_admin: + nvme_stop_keep_alive(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); @@ -1128,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -1161,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, struct nvme_rdma_queue *queue = wc->qp->qp_context; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) dev_info(ctrl->ctrl.device, "%s for CQE 0x%p failed with status %s (%d)\n", op, wc->wr_cqe, @@ -1944,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", rq->tag, nvme_rdma_queue_idx(queue)); - if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 89661a9cf850..08805f027810 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -36,11 +36,11 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); -#ifdef CONFIG_NVME_TCP_TLS /* * TLS handshake timeout */ static int tls_handshake_timeout = 10; +#ifdef CONFIG_NVME_TCP_TLS module_param(tls_handshake_timeout, int, 0644); MODULE_PARM_DESC(tls_handshake_timeout, "nvme TLS handshake timeout in seconds (default 10)"); @@ -161,10 +161,8 @@ struct nvme_tcp_queue { struct ahash_request *snd_hash; __le32 exp_ddgst; __le32 recv_ddgst; -#ifdef CONFIG_NVME_TCP_TLS struct completion tls_complete; int tls_err; -#endif struct page_frag_cache pf_cache; void (*state_change)(struct sock *); @@ -207,6 +205,14 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl) +{ + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) + return 0; + + return ctrl->opts->tls; +} + static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) { u32 queue_idx = nvme_tcp_queue_id(queue); @@ -1412,7 +1418,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) memset(&msg, 0, sizeof(msg)); iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); - if (queue->ctrl->ctrl.opts->tls) { + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } @@ -1424,7 +1430,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } ret = -ENOTCONN; - if (queue->ctrl->ctrl.opts->tls) { + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { @@ -1548,7 +1554,6 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -#ifdef CONFIG_NVME_TCP_TLS static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) { struct nvme_tcp_queue *queue = data; @@ -1625,14 +1630,6 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, } return ret; } -#else -static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, - struct nvme_tcp_queue *queue, - key_serial_t pskid) -{ - return -EPROTONOSUPPORT; -} -#endif static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, key_serial_t pskid) @@ -1759,7 +1756,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, } /* If PSKs are configured try to start TLS */ - if (pskid) { + if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) { ret = nvme_tcp_start_tls(nctrl, queue, pskid); if (ret) goto err_init_connect; @@ -1916,7 +1913,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) int ret; key_serial_t pskid = 0; - if (ctrl->opts->tls) { + if (nvme_tcp_tls(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); else @@ -1949,7 +1946,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (ctrl->opts->tls && !ctrl->tls_key) { + if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) { dev_err(ctrl->device, "no PSK negotiated\n"); return -ENOKEY; } @@ -2155,10 +2152,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) { + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + /* If we are resetting/deleting then do nothing */ - if (ctrl->state != NVME_CTRL_CONNECTING) { - WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || - ctrl->state == NVME_CTRL_LIVE); + if (state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); return; } @@ -2218,8 +2216,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) * unless we're during creation of a new controller to * avoid races with teardown flow. */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; @@ -2237,6 +2237,7 @@ destroy_io: nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: + nvme_stop_keep_alive(ctrl); nvme_tcp_teardown_admin_queue(ctrl, false); return ret; } @@ -2282,8 +2283,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -2313,8 +2316,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -2432,7 +2437,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, opc, nvme_opcode_str(qid, opc, fctype)); - if (ctrl->state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller |