aboutsummaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c108
1 files changed, 82 insertions, 26 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c2730b116dc6..3ec38e2b9173 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req);
- if (ctrl->kas)
+ /*
+ * Completions of long-running commands should not be able to
+ * defer sending of periodic keep alives, since the controller
+ * may have completed processing such commands a long time ago
+ * (arbitrarily close to command submission time).
+ * req->deadline - req->timeout is the command submission time
+ * in jiffies.
+ */
+ if (ctrl->kas &&
+ req->deadline - req->timeout >= ctrl->ka_last_check_time)
ctrl->comp_seen = true;
switch (nvme_decide_disposition(req)) {
@@ -450,8 +459,8 @@ bool nvme_cancel_request(struct request *req, void *data)
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
- /* don't abort one completed request */
- if (blk_mq_request_completed(req))
+ /* don't abort one completed or idle request */
+ if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
return true;
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
@@ -781,16 +790,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
range = page_address(ns->ctrl->discard_page);
}
- __rq_for_each_bio(bio, req) {
- u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
- u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+ if (queue_max_discard_segments(req->q) == 1) {
+ u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
+ u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
- if (n < segments) {
- range[n].cattr = cpu_to_le32(0);
- range[n].nlb = cpu_to_le32(nlb);
- range[n].slba = cpu_to_le64(slba);
+ range[0].cattr = cpu_to_le32(0);
+ range[0].nlb = cpu_to_le32(nlb);
+ range[0].slba = cpu_to_le64(slba);
+ n = 1;
+ } else {
+ __rq_for_each_bio(bio, req) {
+ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
+ u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+
+ if (n < segments) {
+ range[n].cattr = cpu_to_le32(0);
+ range[n].nlb = cpu_to_le32(nlb);
+ range[n].slba = cpu_to_le64(slba);
+ }
+ n++;
}
- n++;
}
if (WARN_ON_ONCE(n != segments)) {
@@ -1105,7 +1124,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status)
{
if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
@@ -1122,6 +1141,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
+ if (ns)
+ return;
switch (cmd->common.opcode) {
case nvme_admin_set_features:
@@ -1151,9 +1172,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
* The host should send Keep Alive commands at half of the Keep Alive Timeout
* accounting for transport roundtrip times [..].
*/
+static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
+{
+ unsigned long delay = ctrl->kato * HZ / 2;
+
+ /*
+ * When using Traffic Based Keep Alive, we need to run
+ * nvme_keep_alive_work at twice the normal frequency, as one
+ * command completion can postpone sending a keep alive command
+ * by up to twice the delay between runs.
+ */
+ if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS)
+ delay /= 2;
+ return delay;
+}
+
static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work,
+ nvme_keep_alive_work_period(ctrl));
}
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1162,6 +1199,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long flags;
bool startka = false;
+ unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
+ unsigned long delay = nvme_keep_alive_work_period(ctrl);
+
+ /*
+ * Subtract off the keepalive RTT so nvme_keep_alive_work runs
+ * at the desired frequency.
+ */
+ if (rtt <= delay) {
+ delay -= rtt;
+ } else {
+ dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n",
+ jiffies_to_msecs(rtt));
+ delay = 0;
+ }
blk_mq_free_request(rq);
@@ -1172,6 +1223,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
return RQ_END_IO_NONE;
}
+ ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->state == NVME_CTRL_LIVE ||
@@ -1179,7 +1231,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- nvme_queue_keep_alive_work(ctrl);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
return RQ_END_IO_NONE;
}
@@ -1190,6 +1242,8 @@ static void nvme_keep_alive_work(struct work_struct *work)
bool comp_seen = ctrl->comp_seen;
struct request *rq;
+ ctrl->ka_last_check_time = jiffies;
+
if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
@@ -1664,6 +1718,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
struct request_queue *queue = disk->queue;
u32 size = queue_logical_block_size(queue);
+ if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
+ ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+
if (ctrl->max_discard_sectors == 0) {
blk_queue_max_discard_sectors(queue, 0);
return;
@@ -1678,9 +1735,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
if (queue->limits.max_discard_sectors)
return;
- if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
- ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
-
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
@@ -3053,7 +3107,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
else
ctrl->max_zeroes_sectors = 0;
- if (nvme_ctrl_limited_cns(ctrl))
+ if (ctrl->subsys->subtype != NVME_NQN_NVME ||
+ nvme_ctrl_limited_cns(ctrl))
return 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -3574,6 +3629,9 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags))
+ return -EBUSY;
+
if (device_remove_file_self(dev, attr))
nvme_delete_ctrl_sync(ctrl);
return count;
@@ -4808,8 +4866,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
u32 aer_notice_type = nvme_aer_subtype(result);
bool requeue = true;
- trace_nvme_async_event(ctrl, aer_notice_type);
-
switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
@@ -4845,7 +4901,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
{
- trace_nvme_async_event(ctrl, NVME_AER_ERROR);
dev_warn(ctrl->device, "resetting controller due to AER\n");
nvme_reset_ctrl(ctrl);
}
@@ -4861,6 +4916,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;
+ trace_nvme_async_event(ctrl, result);
switch (aer_type) {
case NVME_AER_NOTICE:
requeue = nvme_handle_aen_notice(ctrl, result);
@@ -4878,7 +4934,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
- trace_nvme_async_event(ctrl, aer_type);
ctrl->aen_result = result;
break;
default:
@@ -5037,7 +5092,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
* that were missed. We identify persistent discovery controllers by
* checking that they started once before, hence are reconnecting back.
*/
- if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
+ if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
nvme_discovery_ctrl(ctrl))
nvme_change_uevent(ctrl, "NVME_EVENT=rediscover");
@@ -5048,6 +5103,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
}
nvme_change_uevent(ctrl, "NVME_EVENT=connected");
+ set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags);
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
@@ -5381,14 +5437,14 @@ static int __init nvme_core_init(void)
if (result < 0)
goto destroy_delete_wq;
- nvme_class = class_create(THIS_MODULE, "nvme");
+ nvme_class = class_create("nvme");
if (IS_ERR(nvme_class)) {
result = PTR_ERR(nvme_class);
goto unregister_chrdev;
}
nvme_class->dev_uevent = nvme_class_uevent;
- nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
+ nvme_subsys_class = class_create("nvme-subsystem");
if (IS_ERR(nvme_subsys_class)) {
result = PTR_ERR(nvme_subsys_class);
goto destroy_class;
@@ -5399,7 +5455,7 @@ static int __init nvme_core_init(void)
if (result < 0)
goto destroy_subsys_class;
- nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic");
+ nvme_ns_chr_class = class_create("nvme-generic");
if (IS_ERR(nvme_ns_chr_class)) {
result = PTR_ERR(nvme_ns_chr_class);
goto unregister_generic_ns;