diff options
| author | Dmitry Torokhov <[email protected]> | 2023-08-30 16:06:38 -0700 | 
|---|---|---|
| committer | Dmitry Torokhov <[email protected]> | 2023-08-30 16:06:38 -0700 | 
| commit | 1ac731c529cd4d6adbce134754b51ff7d822b145 (patch) | |
| tree | 143ab3f35ca5f3b69f583c84e6964b17139c2ec1 /drivers/nvme/host/core.c | |
| parent | 07b4c950f27bef0362dc6ad7ee713aab61d58149 (diff) | |
| parent | 54116d442e001e1b6bd482122043b1870998a1f3 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 6.6 merge window.
Diffstat (limited to 'drivers/nvme/host/core.c')
| -rw-r--r-- | drivers/nvme/host/core.c | 108 | 
1 files changed, 82 insertions, 26 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c2730b116dc6..3ec38e2b9173 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req)  	trace_nvme_complete_rq(req);  	nvme_cleanup_cmd(req); -	if (ctrl->kas) +	/* +	 * Completions of long-running commands should not be able to +	 * defer sending of periodic keep alives, since the controller +	 * may have completed processing such commands a long time ago +	 * (arbitrarily close to command submission time). +	 * req->deadline - req->timeout is the command submission time +	 * in jiffies. +	 */ +	if (ctrl->kas && +	    req->deadline - req->timeout >= ctrl->ka_last_check_time)  		ctrl->comp_seen = true;  	switch (nvme_decide_disposition(req)) { @@ -450,8 +459,8 @@ bool nvme_cancel_request(struct request *req, void *data)  	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,  				"Cancelling I/O %d", req->tag); -	/* don't abort one completed request */ -	if (blk_mq_request_completed(req)) +	/* don't abort one completed or idle request */ +	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)  		return true;  	nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; @@ -781,16 +790,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,  		range = page_address(ns->ctrl->discard_page);  	} -	__rq_for_each_bio(bio, req) { -		u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); -		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; +	if (queue_max_discard_segments(req->q) == 1) { +		u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); +		u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); -		if (n < segments) { -			range[n].cattr = cpu_to_le32(0); -			range[n].nlb = cpu_to_le32(nlb); -			range[n].slba = cpu_to_le64(slba); +		range[0].cattr = cpu_to_le32(0); +		range[0].nlb = cpu_to_le32(nlb); +		range[0].slba = cpu_to_le64(slba); +		n = 1; +	} else { +		__rq_for_each_bio(bio, req) { +			u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); +			u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + +			if (n < segments) { +				range[n].cattr = cpu_to_le32(0); +				range[n].nlb = cpu_to_le32(nlb); +				range[n].slba = cpu_to_le64(slba); +			} +			n++;  		} -		n++;  	}  	if (WARN_ON_ONCE(n != segments)) { @@ -1105,7 +1124,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)  }  EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,  		       struct nvme_command *cmd, int status)  {  	if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1122,6 +1141,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,  		nvme_queue_scan(ctrl);  		flush_work(&ctrl->scan_work);  	} +	if (ns) +		return;  	switch (cmd->common.opcode) {  	case nvme_admin_set_features: @@ -1151,9 +1172,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);   *   The host should send Keep Alive commands at half of the Keep Alive Timeout   *   accounting for transport roundtrip times [..].   */ +static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) +{ +	unsigned long delay = ctrl->kato * HZ / 2; + +	/* +	 * When using Traffic Based Keep Alive, we need to run +	 * nvme_keep_alive_work at twice the normal frequency, as one +	 * command completion can postpone sending a keep alive command +	 * by up to twice the delay between runs. +	 */ +	if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) +		delay /= 2; +	return delay; +} +  static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)  { -	queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); +	queue_delayed_work(nvme_wq, &ctrl->ka_work, +			   nvme_keep_alive_work_period(ctrl));  }  static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, @@ -1162,6 +1199,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,  	struct nvme_ctrl *ctrl = rq->end_io_data;  	unsigned long flags;  	bool startka = false; +	unsigned long rtt = jiffies - (rq->deadline - rq->timeout); +	unsigned long delay = nvme_keep_alive_work_period(ctrl); + +	/* +	 * Subtract off the keepalive RTT so nvme_keep_alive_work runs +	 * at the desired frequency. +	 */ +	if (rtt <= delay) { +		delay -= rtt; +	} else { +		dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n", +			 jiffies_to_msecs(rtt)); +		delay = 0; +	}  	blk_mq_free_request(rq); @@ -1172,6 +1223,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,  		return RQ_END_IO_NONE;  	} +	ctrl->ka_last_check_time = jiffies;  	ctrl->comp_seen = false;  	spin_lock_irqsave(&ctrl->lock, flags);  	if (ctrl->state == NVME_CTRL_LIVE || @@ -1179,7 +1231,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,  		startka = true;  	spin_unlock_irqrestore(&ctrl->lock, flags);  	if (startka) -		nvme_queue_keep_alive_work(ctrl); +		queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);  	return RQ_END_IO_NONE;  } @@ -1190,6 +1242,8 @@ static void nvme_keep_alive_work(struct work_struct *work)  	bool comp_seen = ctrl->comp_seen;  	struct request *rq; +	ctrl->ka_last_check_time = jiffies; +  	if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {  		dev_dbg(ctrl->device,  			"reschedule traffic based keep-alive timer\n"); @@ -1664,6 +1718,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)  	struct request_queue *queue = disk->queue;  	u32 size = queue_logical_block_size(queue); +	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) +		ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); +  	if (ctrl->max_discard_sectors == 0) {  		blk_queue_max_discard_sectors(queue, 0);  		return; @@ -1678,9 +1735,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)  	if (queue->limits.max_discard_sectors)  		return; -	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) -		ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); -  	blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);  	blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); @@ -3053,7 +3107,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)  	else  		ctrl->max_zeroes_sectors = 0; -	if (nvme_ctrl_limited_cns(ctrl)) +	if (ctrl->subsys->subtype != NVME_NQN_NVME || +	    nvme_ctrl_limited_cns(ctrl))  		return 0;  	id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -3574,6 +3629,9 @@ static ssize_t nvme_sysfs_delete(struct device *dev,  {  	struct nvme_ctrl *ctrl = dev_get_drvdata(dev); +	if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) +		return -EBUSY; +  	if (device_remove_file_self(dev, attr))  		nvme_delete_ctrl_sync(ctrl);  	return count; @@ -4808,8 +4866,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)  	u32 aer_notice_type = nvme_aer_subtype(result);  	bool requeue = true; -	trace_nvme_async_event(ctrl, aer_notice_type); -  	switch (aer_notice_type) {  	case NVME_AER_NOTICE_NS_CHANGED:  		set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); @@ -4845,7 +4901,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)  static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)  { -	trace_nvme_async_event(ctrl, NVME_AER_ERROR);  	dev_warn(ctrl->device, "resetting controller due to AER\n");  	nvme_reset_ctrl(ctrl);  } @@ -4861,6 +4916,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,  	if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)  		return; +	trace_nvme_async_event(ctrl, result);  	switch (aer_type) {  	case NVME_AER_NOTICE:  		requeue = nvme_handle_aen_notice(ctrl, result); @@ -4878,7 +4934,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,  	case NVME_AER_SMART:  	case NVME_AER_CSS:  	case NVME_AER_VS: -		trace_nvme_async_event(ctrl, aer_type);  		ctrl->aen_result = result;  		break;  	default: @@ -5037,7 +5092,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)  	 * that were missed. We identify persistent discovery controllers by  	 * checking that they started once before, hence are reconnecting back.  	 */ -	if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) && +	if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&  	    nvme_discovery_ctrl(ctrl))  		nvme_change_uevent(ctrl, "NVME_EVENT=rediscover"); @@ -5048,6 +5103,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)  	}  	nvme_change_uevent(ctrl, "NVME_EVENT=connected"); +	set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags);  }  EXPORT_SYMBOL_GPL(nvme_start_ctrl); @@ -5381,14 +5437,14 @@ static int __init nvme_core_init(void)  	if (result < 0)  		goto destroy_delete_wq; -	nvme_class = class_create(THIS_MODULE, "nvme"); +	nvme_class = class_create("nvme");  	if (IS_ERR(nvme_class)) {  		result = PTR_ERR(nvme_class);  		goto unregister_chrdev;  	}  	nvme_class->dev_uevent = nvme_class_uevent; -	nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); +	nvme_subsys_class = class_create("nvme-subsystem");  	if (IS_ERR(nvme_subsys_class)) {  		result = PTR_ERR(nvme_subsys_class);  		goto destroy_class; @@ -5399,7 +5455,7 @@ static int __init nvme_core_init(void)  	if (result < 0)  		goto destroy_subsys_class; -	nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic"); +	nvme_ns_chr_class = class_create("nvme-generic");  	if (IS_ERR(nvme_ns_chr_class)) {  		result = PTR_ERR(nvme_ns_chr_class);  		goto unregister_generic_ns;  |