From 4cff280a5fccf6513ed9e895bb3a4e7ad8b0cedc Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 14 Nov 2018 16:35:10 -0800
Subject: nvme-fc: resolve io failures during connect

If an io error occurs on an io issued while connecting, recovery
of the io falls flat as the state checking ends up nooping the error
handler.

Create an err_work work item that is scheduled upon an io error while
connecting. The work thread terminates all io on all queues and marks
the queues as not connected.  The termination of the io will return
back to the callee, which will then back out of the connection attempt
and will reschedule, if possible, the connection attempt.

The changes:
- in case there are several commands hitting the error handler, a
  state flag is kept so that the error work is only scheduled once,
  on the first error. The subsequent errors can be ignored.
- The calling sequence to stop keep alive and terminate the queues
  and their io is lifted from the reset routine. Made a small
  service routine used by both reset and err_work.
- During debugging, found that the teardown path can reference
  an uninitialized pointer, resulting in a NULL pointer oops.
  The aen_ops weren't initialized yet. Add validation on their
  initialization before calling the teardown routine.

Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 73 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 10 deletions(-)

(limited to 'drivers/nvme/host/fc.c')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0b70c8bab045..54032c466636 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -152,6 +152,7 @@ struct nvme_fc_ctrl {
 
 	bool			ioq_live;
 	bool			assoc_active;
+	atomic_t		err_work_active;
 	u64			association_id;
 
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
@@ -160,6 +161,7 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	tag_set;
 
 	struct delayed_work	connect_work;
+	struct work_struct	err_work;
 
 	struct kref		ref;
 	u32			flags;
@@ -1531,6 +1533,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
 	int i;
 
+	/* ensure we've initialized the ops once */
+	if (!(aen_op->flags & FCOP_FLAGS_AEN))
+		return;
+
 	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
 		__nvme_fc_abort_op(ctrl, aen_op);
 }
@@ -2049,7 +2055,25 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 static void
 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
-	/* only proceed if in LIVE state - e.g. on first error */
+	int active;
+
+	/*
+	 * if an error (io timeout, etc) while (re)connecting,
+	 * it's an error on creating the new association.
+	 * Start the error recovery thread if it hasn't already
+	 * been started. It is expected there could be multiple
+	 * ios hitting this path before things are cleaned up.
+	 */
+	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+		active = atomic_xchg(&ctrl->err_work_active, 1);
+		if (!active && !schedule_work(&ctrl->err_work)) {
+			atomic_set(&ctrl->err_work_active, 0);
+			WARN_ON(1);
+		}
+		return;
+	}
+
+	/* Otherwise, only proceed if in LIVE state - e.g. on first error */
 	if (ctrl->ctrl.state != NVME_CTRL_LIVE)
 		return;
 
@@ -2814,6 +2838,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
+	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 	/*
 	 * kill the association on the link side.  this will block
@@ -2866,23 +2891,30 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 }
 
 static void
-nvme_fc_reset_ctrl_work(struct work_struct *work)
+__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_ctrl *ctrl =
-		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
-	int ret;
-
-	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
+	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
 			"to CONNECTING\n", ctrl->cnum);
-		return;
-	}
+}
+
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
+	int ret;
+
+	__nvme_fc_terminate_io(ctrl);
+
+	nvme_stop_ctrl(&ctrl->ctrl);
 
 	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
 		ret = nvme_fc_create_association(ctrl);
@@ -2897,6 +2929,24 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 			ctrl->cnum);
 }
 
+static void
+nvme_fc_connect_err_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(work, struct nvme_fc_ctrl, err_work);
+
+	__nvme_fc_terminate_io(ctrl);
+
+	atomic_set(&ctrl->err_work_active, 0);
+
+	/*
+	 * Rescheduling the connection after recovering
+	 * from the io error is left to the reconnect work
+	 * item, which is what should have stalled waiting on
+	 * the io that had the error that scheduled this work.
+	 */
+}
+
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
@@ -3007,6 +3057,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->cnum = idx;
 	ctrl->ioq_live = false;
 	ctrl->assoc_active = false;
+	atomic_set(&ctrl->err_work_active, 0);
 	init_waitqueue_head(&ctrl->ioabort_wait);
 
 	get_device(ctrl->dev);
@@ -3014,6 +3065,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -3103,6 +3155,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 fail_ctrl:
 	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
 	cancel_work_sync(&ctrl->ctrl.reset_work);
+	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 
 	ctrl->ctrl.opts = NULL;
-- 
cgit 


From dfa74422d604abc2e16763db12646583219806e4 Mon Sep 17 00:00:00 2001
From: "Ewan D. Milne" <emilne@redhat.com>
Date: Mon, 26 Nov 2018 12:01:30 -0500
Subject: nvme-fc: initialize nvme_req(rq)->ctrl after calling
 __nvme_fc_init_request()

__nvme_fc_init_request() invokes memset() on the nvme_fcp_op_w_sgl structure, which
NULLed-out the nvme_req(req)->ctrl field previously set by nvme_fc_init_request().
This apparently was not referenced until commit faf4a44fff ("nvme: support traffic
based keep-alive") which now results in a crash in nvme_complete_rq():

[ 8386.897130] RIP: 0010:panic+0x220/0x26c
[ 8386.901406] Code: 83 3d 6f ee 72 01 00 74 05 e8 e8 54 02 00 48 c7 c6 40 fd 5b b4 48 c7 c7 d8 8d c6 b3 31e
[ 8386.922359] RSP: 0018:ffff99650019fc40 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
[ 8386.930804] RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000006
[ 8386.938764] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff8e325f8168b0
[ 8386.946725] RBP: ffff99650019fcb0 R08: 0000000000000000 R09: 00000000000004f8
[ 8386.954687] R10: 0000000000000000 R11: ffff99650019f9b8 R12: ffffffffb3c55f3c
[ 8386.962648] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001
[ 8386.970613]  oops_end+0xd1/0xe0
[ 8386.974116]  no_context+0x1b2/0x3c0
[ 8386.978006]  do_page_fault+0x32/0x140
[ 8386.982090]  page_fault+0x1e/0x30
[ 8386.985786] RIP: 0010:nvme_complete_rq+0x65/0x1d0 [nvme_core]
[ 8386.992195] Code: 41 bc 03 00 00 00 74 16 0f 86 c3 00 00 00 66 3d 83 00 41 bc 06 00 00 00 0f 85 e7 00 000
[ 8387.013147] RSP: 0018:ffff99650019fe18 EFLAGS: 00010246
[ 8387.018973] RAX: 0000000000000000 RBX: ffff8e322ae51280 RCX: 0000000000000001
[ 8387.026935] RDX: 0000000000000400 RSI: 0000000000000001 RDI: ffff8e322ae51280
[ 8387.034897] RBP: ffff8e322ae51280 R08: 0000000000000000 R09: ffffffffb2f0b890
[ 8387.042859] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
[ 8387.050821] R13: 0000000000000100 R14: 0000000000000004 R15: ffff8e2b0446d990
[ 8387.058782]  ? swiotlb_unmap_page+0x40/0x40
[ 8387.063448]  nvme_fc_complete_rq+0x2d/0x70 [nvme_fc]
[ 8387.068986]  blk_done_softirq+0xa1/0xd0
[ 8387.073264]  __do_softirq+0xd6/0x2a9
[ 8387.077251]  run_ksoftirqd+0x26/0x40
[ 8387.081238]  smpboot_thread_fn+0x10e/0x160
[ 8387.085807]  kthread+0xf8/0x130
[ 8387.089309]  ? sort_range+0x20/0x20
[ 8387.093198]  ? kthread_stop+0x110/0x110
[ 8387.097475]  ret_from_fork+0x35/0x40
[ 8387.101462] ---[ end trace 7106b0adf5e422f8 ]---

Fixes: faf4a44fff ("nvme: support traffic based keep-alive")
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme/host/fc.c')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 54032c466636..feb86b59170e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1752,12 +1752,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 	int res;
 
-	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
 	if (res)
 		return res;
 	op->op.fcp_req.first_sgl = &op->sgl[0];
 	op->op.fcp_req.private = &op->priv[0];
+	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	return res;
 }
 
-- 
cgit