From efa56305908ba20de2104f1b8508c6a7401833be Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 22 Dec 2023 16:17:48 +0100 Subject: nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length If the host sends an H2CData command with an invalid DATAL, the kernel may crash in nvmet_tcp_build_pdu_iovec(). Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 lr : nvmet_tcp_io_work+0x6ac/0x718 [nvmet_tcp] Call trace: process_one_work+0x174/0x3c8 worker_thread+0x2d0/0x3e8 kthread+0x104/0x110 Fix the bug by raising a fatal error if DATAL isn't coherent with the packet size. Also, the PDU length should never exceed the MAXH2CDATA parameter which has been communicated to the host in nvmet_tcp_handle_icreq(). Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 4cc27856aa8f..ad16795934b8 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -24,6 +24,7 @@ #include "nvmet.h" #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */ static int param_store_val(const char *str, int *val, int min, int max) { @@ -923,7 +924,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */ + icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA); icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; @@ -978,6 +979,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; + unsigned int plen; if (likely(queue->nr_cmds)) { if (unlikely(data->ttag >= queue->nr_cmds)) { @@ -1001,7 +1003,16 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) return -EPROTO; } + plen = le32_to_cpu(data->hdr.plen); cmd->pdu_len = le32_to_cpu(data->data_length); + if (unlikely(cmd->pdu_len != (plen - sizeof(*data)) || + cmd->pdu_len == 0 || + cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) { + pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); + /* FIXME: use proper transport errors */ + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd->pdu_recv = 0; nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; -- cgit From 0849a5441358cef02586fb2d60f707c0db195628 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 22 Dec 2023 16:17:49 +0100 Subject: nvmet-tcp: fix a crash in nvmet_req_complete() in nvmet_tcp_handle_h2c_data_pdu(), if the host sends a data_offset different from rbytes_done, the driver ends up calling nvmet_req_complete() passing a status error. The problem is that at this point cmd->req is not yet initialized, the kernel will crash after dereferencing a NULL pointer. Fix the bug by replacing the call to nvmet_req_complete() with nvmet_tcp_fatal_error(). Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ad16795934b8..b4b6a8ac8089 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -998,8 +998,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) data->ttag, le32_to_cpu(data->data_offset), cmd->rbytes_done); /* FIXME: use path and transport errors */ - nvmet_req_complete(&cmd->req, - NVME_SC_INVALID_FIELD | NVME_SC_DNR); + nvmet_tcp_fatal_error(queue); return -EPROTO; } -- cgit From 75011bd0f9c55db523242f9f9a0b0b826165f14b Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 22 Dec 2023 16:17:50 +0100 Subject: nvmet-tcp: remove boilerplate code Simplify the nvmet_tcp_handle_h2c_data_pdu() function by removing boilerplate code. Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index b4b6a8ac8089..3569c1255c5e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -985,8 +985,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) if (unlikely(data->ttag >= queue->nr_cmds)) { pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", queue->idx, data->ttag, queue->nr_cmds); - nvmet_tcp_fatal_error(queue); - return -EPROTO; + goto err_proto; } cmd = &queue->cmds[data->ttag]; } else { @@ -997,9 +996,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) pr_err("ttag %u unexpected data offset %u (expected %u)\n", data->ttag, le32_to_cpu(data->data_offset), cmd->rbytes_done); - /* FIXME: use path and transport errors */ - nvmet_tcp_fatal_error(queue); - return -EPROTO; + goto err_proto; } plen = le32_to_cpu(data->hdr.plen); @@ -1008,9 +1005,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) cmd->pdu_len == 0 || cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) { pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); - /* FIXME: use proper transport errors */ - nvmet_tcp_fatal_error(queue); - return -EPROTO; + goto err_proto; } cmd->pdu_recv = 0; nvmet_tcp_build_pdu_iovec(cmd); @@ -1018,6 +1013,11 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) queue->rcv_state = NVMET_TCP_RECV_DATA; return 0; + +err_proto: + /* FIXME: use proper transport errors */ + nvmet_tcp_fatal_error(queue); + return -EPROTO; } static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) -- cgit From ef184b8844bf98a2a80fab8eecda1489aed5d97f Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Sun, 31 Dec 2023 14:56:44 +0800 Subject: nvme: tcp: remove unnecessary goto statement There is no requirement to call nvme_tcp_free_queue() for queue deallocation if the pskid is null or the queue allocation fails, as the NVME_TCP_Q_ALLOCATED flag would not be set in such scenarios. Signed-off-by: Guixin Liu Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d79811cfa0ce..5056bcae2f39 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) ctrl->opts->subsysnqn); if (!pskid) { dev_err(ctrl->device, "no valid PSK found\n"); - ret = -ENOKEY; - goto out_free_queue; + return -ENOKEY; } } ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); if (ret) - goto out_free_queue; + return ret; ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); if (ret) -- cgit From 2ad28ce9b98f8b22feaecc0966c706a8ef59cbf0 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 1 Jan 2024 12:35:27 +0200 Subject: nvme: remove unused definition There is no users for NVMF_AUTH_HASH_LEN macro. Reviewed-by: Israel Rukshin Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 44325c068b6a..462c21e0e417 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -20,7 +20,6 @@ #define NVMF_TRSVCID_SIZE 32 #define NVMF_TRADDR_SIZE 256 #define NVMF_TSAS_SIZE 256 -#define NVMF_AUTH_HASH_LEN 64 #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -- cgit From 2abd2c39ada8200ca5f02d483dccfa82799f51a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:14:12 +0000 Subject: nvme-common: mark nvme_tls_psk_prio static Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index ee341b83eeba..a5c0431c101c 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, * should be preferred to 'generated' PSKs, * and SHA-384 should be preferred to SHA-256. */ -struct nvme_tls_psk_priority_list { +static struct nvme_tls_psk_priority_list { bool generated; enum nvme_tcp_tls_cipher cipher; } nvme_tls_psk_prio[] = { -- cgit From 3a96bff229d6e3016805fd6c3dba0655ccba01eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:13:29 +0000 Subject: nvmet-tcp: fix a missing endianess conversion in nvmet_tcp_try_peek_pdu No, a __le32 cast doesn't magically byteswap on big-endian systems.. Fixes: 70525e5d82f6 ("nvmet-tcp: peek icreq before starting TLS") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 3569c1255c5e..792828fb91cc 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1778,7 +1778,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) (int)sizeof(struct nvme_tcp_icreq_pdu)); if (hdr->type == nvme_tcp_icreq && hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) && - hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) { + hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) { pr_debug("queue %d: icreq detected\n", queue->idx); return len; -- cgit From d3074e9a73e3c0511f1033b15345e2feb9664b3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:58:41 +0000 Subject: nvme: update the explanation for not updating the limits in nvme_config_discard Expeand the comment a bit to explain what is going on. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d144d1acb09a..56107cfc97b7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1743,7 +1743,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, queue->limits.discard_granularity = size; - /* If discard is already enabled, don't reset queue limits */ + /* + * If discard is already enabled, don't reset queue limits. + * + * This works around the fact that the block layer can't cope well with + * updating the hardware limits when overridden through sysfs. This is + * harmless because discard limits in NVMe are purely advisory. + */ if (queue->limits.max_discard_sectors) return; -- cgit From a4be9679aa3e862adcab465122c7678c2b5d40e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:58:42 +0000 Subject: nvme: also skip discard granularity updates in nvme_config_discard Don't just skip the discard sectors and segments but also the granularity if a value was already set before. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 56107cfc97b7..6c52b0ab382c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1727,7 +1727,6 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, struct nvme_ns_head *head) { struct request_queue *queue = disk->queue; - u32 size = queue_logical_block_size(queue); if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) ctrl->max_discard_sectors = @@ -1741,8 +1740,6 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_granularity = size; - /* * If discard is already enabled, don't reset queue limits. * @@ -1755,6 +1752,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); + queue->limits.discard_granularity = queue_logical_block_size(queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); -- cgit From f29886c249ec2ed566e423fd02f6071b8f0a3346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:58:43 +0000 Subject: nvme: fix max_discard_sectors calculation ctrl->max_discard_sectors stores a value that is potentially based of the DMRSL field in Identify Controller, which is in units of LBAs and thus dependent on the Format of a namespace. Fix this by moving the calculation of max_discard_sectors entirely into nvme_config_discard and replacing the ctrl->max_discard_sectors value with a local variable so that the calculation is always namespace-specific. Fixes: 1a86924e4f46 ("nvme: fix interpretation of DMRSL") Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 20 +++++++++----------- drivers/nvme/host/nvme.h | 1 - 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6c52b0ab382c..86b9a1c4876f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1727,12 +1727,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, struct nvme_ns_head *head) { struct request_queue *queue = disk->queue; + u32 max_discard_sectors; - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) - ctrl->max_discard_sectors = - nvme_lba_to_sect(head, ctrl->dmrsl); - - if (ctrl->max_discard_sectors == 0) { + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) { + max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl); + } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { + max_discard_sectors = UINT_MAX; + } else { blk_queue_max_discard_sectors(queue, 0); return; } @@ -1750,7 +1751,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, if (queue->limits.max_discard_sectors) return; - blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); + blk_queue_max_discard_sectors(queue, max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); queue->limits.discard_granularity = queue_logical_block_size(queue); @@ -2911,13 +2912,10 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) struct nvme_id_ctrl_nvm *id; int ret; - if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { - ctrl->max_discard_sectors = UINT_MAX; + if (ctrl->oncs & NVME_CTRL_ONCS_DSM) ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; - } else { - ctrl->max_discard_sectors = 0; + else ctrl->max_discard_segments = 0; - } /* * Even though NVMe spec explicitly states that MDTS is not applicable diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3dbd187896d8..9a698c49ea03 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -297,7 +297,6 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; - u32 max_discard_sectors; u32 max_discard_segments; u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED -- cgit From 3b946fe1cc149b23dad3a233c77b1475834f4d6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:58:44 +0000 Subject: nvme: simplify the max_discard_segments calculation Just stash away the DMRL value in the nvme_ctrl struture, and leave all interpretation to nvme_config_discard, where we know DSM is supported by the time we're configuring the number of segments. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 13 +++++-------- drivers/nvme/host/nvme.h | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 86b9a1c4876f..50818dbcfa1a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1752,7 +1752,10 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, return; blk_queue_max_discard_sectors(queue, max_discard_sectors); - blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); + if (ctrl->dmrl) + blk_queue_max_discard_segments(queue, ctrl->dmrl); + else + blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); queue->limits.discard_granularity = queue_logical_block_size(queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) @@ -2912,11 +2915,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) struct nvme_id_ctrl_nvm *id; int ret; - if (ctrl->oncs & NVME_CTRL_ONCS_DSM) - ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; - else - ctrl->max_discard_segments = 0; - /* * Even though NVMe spec explicitly states that MDTS is not applicable * to the write-zeroes, we are cautious and limit the size to the @@ -2946,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (ret) goto free_data; - if (id->dmrl) - ctrl->max_discard_segments = id->dmrl; + ctrl->dmrl = id->dmrl; ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9a698c49ea03..297b80430f1b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -297,13 +297,13 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; - u32 max_discard_segments; u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED u32 max_zone_append; #endif u16 crdt[3]; u16 oncs; + u8 dmrl; u32 dmrsl; u16 oacs; u16 sqsize; -- cgit From 72e8c9379dbef2662c2479c3d142e4c44d598a5b Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 16:30:50 +0100 Subject: nvmet-fc: remove unnecessary bracket There is no need for the bracket around the identifier. Remove it. Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index bd59990b5250..bda7a3009e85 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1031,7 +1031,7 @@ nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) list_for_each_entry(host, &tgtport->host_list, host_list) { if (host->hosthandle == hosthandle && !host->invalid) { if (nvmet_fc_hostport_get(host)) - return (host); + return host; } } -- cgit From 0e716cec6fb11a14c220ee17c404b67962e902f7 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 16:30:51 +0100 Subject: nvmet-trace: avoid dereferencing pointer too early The first command issued from the host to the target is the fabrics connect command. At this point, neither the target queue nor the controller have been allocated. But we already try to trace this command in nvmet_req_init. Reported by KASAN. Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/trace.c | 6 +++--- drivers/nvme/target/trace.h | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index bff454d46255..6ee1f3db81d0 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -211,7 +211,7 @@ const char *nvmet_trace_disk_name(struct trace_seq *p, char *name) return ret; } -const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl) +const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id) { const char *ret = trace_seq_buffer_ptr(p); @@ -224,8 +224,8 @@ const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl) * If we can know the extra data of the connect command in this stage, * we can update this print statement later. */ - if (ctrl) - trace_seq_printf(p, "%d", ctrl->cntlid); + if (ctrl_id) + trace_seq_printf(p, "%d", ctrl_id); else trace_seq_printf(p, "_"); trace_seq_putc(p, 0); diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 6109b3806b12..2f15070ddc56 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -32,18 +32,24 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \ nvmet_trace_parse_admin_cmd(p, opcode, cdw10))) -const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl); -#define __print_ctrl_name(ctrl) \ - nvmet_trace_ctrl_name(p, ctrl) +const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id); +#define __print_ctrl_id(ctrl_id) \ + nvmet_trace_ctrl_id(p, ctrl_id) const char *nvmet_trace_disk_name(struct trace_seq *p, char *name); #define __print_disk_name(name) \ nvmet_trace_disk_name(p, name) #ifndef TRACE_HEADER_MULTI_READ -static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) +static inline u16 nvmet_req_to_ctrl_id(struct nvmet_req *req) { - return req->sq->ctrl; + /* + * The queue and controller pointers are not valid until an association + * has been established. + */ + if (!req->sq || !req->sq->ctrl) + return 0; + return req->sq->ctrl->cntlid; } static inline void __assign_req_name(char *name, struct nvmet_req *req) @@ -63,7 +69,7 @@ TRACE_EVENT(nvmet_req_init, TP_ARGS(req, cmd), TP_STRUCT__entry( __field(struct nvme_command *, cmd) - __field(struct nvmet_ctrl *, ctrl) + __field(u16, ctrl_id) __array(char, disk, DISK_NAME_LEN) __field(int, qid) __field(u16, cid) @@ -76,7 +82,7 @@ TRACE_EVENT(nvmet_req_init, ), TP_fast_assign( __entry->cmd = cmd; - __entry->ctrl = nvmet_req_to_ctrl(req); + __entry->ctrl_id = nvmet_req_to_ctrl_id(req); __assign_req_name(__entry->disk, req); __entry->qid = req->sq->qid; __entry->cid = cmd->common.command_id; @@ -90,7 +96,7 @@ TRACE_EVENT(nvmet_req_init, ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, " "meta=%#llx, cmd=(%s, %s)", - __print_ctrl_name(__entry->ctrl), + __print_ctrl_id(__entry->ctrl_id), __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, @@ -104,7 +110,7 @@ TRACE_EVENT(nvmet_req_complete, TP_PROTO(struct nvmet_req *req), TP_ARGS(req), TP_STRUCT__entry( - __field(struct nvmet_ctrl *, ctrl) + __field(u16, ctrl_id) __array(char, disk, DISK_NAME_LEN) __field(int, qid) __field(int, cid) @@ -112,7 +118,7 @@ TRACE_EVENT(nvmet_req_complete, __field(u16, status) ), TP_fast_assign( - __entry->ctrl = nvmet_req_to_ctrl(req); + __entry->ctrl_id = nvmet_req_to_ctrl_id(req); __entry->qid = req->cq->qid; __entry->cid = req->cqe->command_id; __entry->result = le64_to_cpu(req->cqe->result.u64); @@ -120,7 +126,7 @@ TRACE_EVENT(nvmet_req_complete, __assign_req_name(__entry->disk, req); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", - __print_ctrl_name(__entry->ctrl), + __print_ctrl_id(__entry->ctrl_id), __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->result, __entry->status) -- cgit From f644d21baab34c837d639e9639aa8204dba7f3cf Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 16:30:53 +0100 Subject: nvmet-fcloop: Remove remote port from list when unlinking The remote port is removed too late from fcloop_nports list. Remove it when port is unregistered. This prevents a busy loop in fcloop_exit, because it is possible the remote port is found in the list and thus we will never progress. The kernel log will be spammed with nvme_fcloop: fcloop_exit: Failed deleting remote port nvme_fcloop: fcloop_exit: Failed deleting target port Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c65a73433c05..ead349af30f1 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -995,11 +995,6 @@ fcloop_nport_free(struct kref *ref) { struct fcloop_nport *nport = container_of(ref, struct fcloop_nport, ref); - unsigned long flags; - - spin_lock_irqsave(&fcloop_lock, flags); - list_del(&nport->nport_list); - spin_unlock_irqrestore(&fcloop_lock, flags); kfree(nport); } @@ -1357,6 +1352,8 @@ __unlink_remote_port(struct fcloop_nport *nport) nport->tport->remoteport = NULL; nport->rport = NULL; + list_del(&nport->nport_list); + return rport; } -- cgit From bd029a02ce46e77e4d553140b039f93cf78ee8c1 Mon Sep 17 00:00:00 2001 From: "Jim.Lin" Date: Tue, 28 Nov 2023 10:57:37 +0800 Subject: nvme-pci: disable write zeroes for SK Hynix BC901 SK Hynix BC901 drive write zero will cause Chromebook takes more than 20 mins to switch to developer mode "disable write zeroes" can fix this issue and Sk Hynix has been verified. Signed-off-by: Jim.Lin Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 507bc149046d..f27202680741 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3394,6 +3394,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1c5c, 0x1D59), /* SK Hynix BC901 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ -- cgit From bafd590910d00327decb3937e77f6f11c3e80e4b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 27 Dec 2023 17:31:06 +0800 Subject: nvme: introduce nvme_disk_is_ns_head helper We currently rely on gendisk's file operations (fops) to distinguish between a namespace head (ns_head) and a regular namespace. To enhance code readability, introduce a helper function. Additionally, we must ensure that the device is not an ns_head before calling nvme_get_ns_from_dev(). To enforce this, add a WARN_ON check within the nvme_get_ns_from_dev(). Signed-off-by: Guixin Liu Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Liu Song [include fix: https://lore.kernel.org/oe-kbuild-all/202401031943.0N72Tkji-lkp@intel.com/] Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 13 ++++++++++++- drivers/nvme/host/pr.c | 2 +- drivers/nvme/host/sysfs.c | 8 ++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 297b80430f1b..6092cc361837 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -920,6 +920,10 @@ extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return disk->fops == &nvme_ns_head_ops; +} #else #define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) @@ -997,6 +1001,10 @@ static inline void nvme_mpath_start_request(struct request *rq) static inline void nvme_mpath_end_request(struct request *rq) { } +static inline bool nvme_disk_is_ns_head(struct gendisk *disk) +{ + return false; +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_revalidate_zones(struct nvme_ns *ns); @@ -1025,7 +1033,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { - return dev_to_disk(dev)->private_data; + struct gendisk *disk = dev_to_disk(dev); + + WARN_ON(nvme_disk_is_ns_head(disk)); + return disk->private_data; } #ifdef CONFIG_NVME_HWMON diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 391b1465ebfd..fc3eed00f9ff 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev, struct nvme_command *c, void *data, unsigned int data_len) { if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - bdev->bd_disk->fops == &nvme_ns_head_ops) + nvme_disk_is_ns_head(bdev->bd_disk)) return nvme_send_ns_head_pr_command(bdev, c, data, data_len); return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data, diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index ac24ad102380..754e91111042 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); - if (disk->fops == &nvme_bdev_ops) - return nvme_get_ns_from_dev(dev)->head; - else + if (nvme_disk_is_ns_head(disk)) return disk->private_data; + return nvme_get_ns_from_dev(dev)->head; } static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, @@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, } #ifdef CONFIG_NVME_MULTIPATH if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ + /* per-path attr */ + if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; -- cgit From 4ee7ffeb4ce50c80bc4504db6f39b25a2df6bcf4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Jan 2024 16:56:55 +0100 Subject: nvmet: re-fix tracing strncpy() warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An earlier patch had tried to address a warning about a string copy with missing zero termination: drivers/nvme/target/trace.h:52:3: warning: ‘strncpy’ specified bound 32 equals destination size [-Wstringop-truncation] The new version causes a different warning with some compiler versions, notably gcc-9 and gcc-10, and also misses the zero padding that was apparently done intentionally in the original code: drivers/nvme/target/trace.h:56:2: error: 'strncpy' specified bound depends on the length of the source argument [-Werror=stringop-overflow=] Change it to use strscpy_pad() with the original length, which will give a properly padded and zero-terminated string as well as avoiding the warning. Fixes: d86481e924a7 ("nvmet: use min of device_path and disk len") Signed-off-by: Arnd Bergmann Signed-off-by: Keith Busch --- drivers/nvme/target/trace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 2f15070ddc56..89020018a0e3 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -59,8 +59,7 @@ static inline void __assign_req_name(char *name, struct nvmet_req *req) return; } - strncpy(name, req->ns->device_path, - min_t(size_t, DISK_NAME_LEN, strlen(req->ns->device_path))); + strscpy_pad(name, req->ns->device_path, DISK_NAME_LEN); } #endif -- cgit From a7de1dea76cd6a3707707af4ea2f8bc3cdeaeb11 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Jan 2024 16:56:56 +0100 Subject: nvme: trace: avoid memcpy overflow warning A previous patch introduced a struct_group() in nvme_common_command to help stringop fortification figure out the length of the fields, but one function is not currently using them: In file included from drivers/nvme/target/core.c:7: In file included from include/linux/string.h:254: include/linux/fortify-string.h:592:4: error: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror,-Wattribute-warning] __read_overflow2_field(q_size_field, size); ^ Change this one to use the correct field name to avoid the warning. Fixes: 5c629dc9609dc ("nvme: use struct group for generic command dwords") Signed-off-by: Arnd Bergmann Signed-off-by: Keith Busch --- drivers/nvme/target/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 89020018a0e3..7f7ebf9558e5 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -90,7 +90,7 @@ TRACE_EVENT(nvmet_req_init, __entry->flags = cmd->common.flags; __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); - memcpy(__entry->cdw10, &cmd->common.cdw10, + memcpy(__entry->cdw10, &cmd->common.cdws, sizeof(__entry->cdw10)); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, " -- cgit From 172fb49600c2b96a4ade255fbfc3fe7098b8afd3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 Dec 2023 10:48:30 -0800 Subject: nvme-pci: enhance timeout kernel log Kernel configs don't necessarily have opcode decoding, and some opcodes are not even decodable. It is still interesting for debugging SSD issues to know what opcode is timing out, what request type it came from, and the data size (if applicable). Also print the command_id along side blk-mq's tag to help match commands with protocol wire traces and firmware logs, Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f27202680741..75e763ce09aa 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct request *abort_req; struct nvme_command cmd = { }; u32 csts = readl(dev->bar + NVME_REG_CSTS); + u8 opcode; /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. @@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, completion polled\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, completion polled\n", + req->tag, nvme_cid(req), nvmeq->qid); return BLK_EH_DONE; } @@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) fallthrough; case NVME_CTRL_DELETING: dev_warn_ratelimited(dev->ctrl.device, - "I/O %d QID %d timeout, disable controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, disable controller\n", + req->tag, nvme_cid(req), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; nvme_dev_disable(dev, true); return BLK_EH_DONE; @@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * command was already aborted once before and still hasn't been * returned to the driver, or if this is the admin queue. */ + opcode = nvme_req(req)->cmd->common.opcode; if (!nvmeq->qid || iod->aborted) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", + req->tag, nvme_cid(req), opcode, + nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } @@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, - "I/O %d (%s) QID %d timeout, aborting\n", - req->tag, - nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode), - nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n", + req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode), + nvmeq->qid, blk_op_str(req_op(req)), req_op(req), + blk_rq_bytes(req)); abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); -- cgit From a5c1a87ce0876192d4343cdf5f0a22d737eb0ec3 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 7 Jan 2024 02:29:49 +0200 Subject: nvme-rdma: enhance timeout kernel log Print the command_id along side blk-mq's tag to help match commands with protocol wire traces and logs. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bc90ec3c51b0..2e77c0f25f71 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1941,9 +1941,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - - dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", - rq->tag, nvme_rdma_queue_idx(queue)); + u8 opcode = req->req.cmd->common.opcode; + u8 fctype = req->req.cmd->fabrics.fctype; + int qid = nvme_rdma_queue_idx(queue); + + dev_warn(ctrl->ctrl.device, + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), opcode, + nvme_opcode_str(qid, opcode, fctype), qid); if (ctrl->ctrl.state != NVME_CTRL_LIVE) { /* -- cgit From 45c36f04f1beb7c4c45f5910a1f69d6d9d5a19fb Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 7 Jan 2024 02:29:50 +0200 Subject: nvme-tcp: enhance timeout kernel log Print the command_id along side blk-mq's tag to help match commands with protocol wire traces and logs. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 5056bcae2f39..b234f0674aeb 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2425,9 +2425,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, - "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", - nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, - opc, nvme_opcode_str(qid, opc, fctype)); + "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), pdu->hdr.type, opc, + nvme_opcode_str(qid, opc, fctype), qid); if (ctrl->state != NVME_CTRL_LIVE) { /* -- cgit From 9a1abc24850eb759e36a2f8869161c3b7254c904 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 5 Jan 2024 09:14:44 +0100 Subject: nvmet-tcp: Fix the H2C expected PDU len calculation The nvmet_tcp_handle_h2c_data_pdu() function should take into consideration the possibility that the header digest and/or the data digests are enabled when calculating the expected PDU length, before comparing it to the value stored in cmd->pdu_len. Fixes: efa56305908b ("nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 792828fb91cc..4dc60cbcb205 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -979,7 +979,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - unsigned int plen; + unsigned int exp_data_len; if (likely(queue->nr_cmds)) { if (unlikely(data->ttag >= queue->nr_cmds)) { @@ -999,9 +999,13 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) goto err_proto; } - plen = le32_to_cpu(data->hdr.plen); + exp_data_len = le32_to_cpu(data->hdr.plen) - + nvmet_tcp_hdgst_len(queue) - + nvmet_tcp_ddgst_len(queue) - + sizeof(*data); + cmd->pdu_len = le32_to_cpu(data->data_length); - if (unlikely(cmd->pdu_len != (plen - sizeof(*data)) || + if (unlikely(cmd->pdu_len != exp_data_len || cmd->pdu_len == 0 || cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) { pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); -- cgit From 06c59d427017fcde3107c236177fcc74c9db7909 Mon Sep 17 00:00:00 2001 From: William Butler Date: Wed, 10 Jan 2024 18:28:55 +0000 Subject: nvme-pci: set doorbell config before unquiescing During resets, if queues are unquiesced first, then the host can submit IOs to the controller using shadow doorbell logic but the controller won't be aware. This can lead to necessary MMIO doorbells from being not issued, causing requests to be delayed and timed-out. Signed-off-by: William Butler Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 75e763ce09aa..46d3897b8986 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2746,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work) * controller around but remove all namespaces. */ if (dev->online_queues > 1) { + nvme_dbbuf_set(dev); nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); -- cgit From 07a29b134ce8e47aef15ea71eab8e6b3734a9720 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 8 Dec 2023 13:53:20 +0100 Subject: nvmet-tcp: avoid circular locking dependency on install_queue() nvmet_tcp_install_queue() is driven from the ->io_work workqueue function, but will call flush_workqueue() which might trigger ->release_work() which in itself calls flush_work on ->io_work. To avoid that check for pending queue in disconnecting status, and return 'controller busy' when we reached a certain threshold. Signed-off-by: Hannes Reinecke Tested-by: Shin'ichiro Kawasaki Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 4dc60cbcb205..6a1e6bb80062 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -25,6 +25,7 @@ #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) #define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */ +#define NVMET_TCP_BACKLOG 128 static int param_store_val(const char *str, int *val, int min, int max) { @@ -2067,7 +2068,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) goto err_sock; } - ret = kernel_listen(port->sock, 128); + ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG); if (ret) { pr_err("failed to listen %d on port sock\n", ret); goto err_sock; @@ -2133,8 +2134,19 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) container_of(sq, struct nvmet_tcp_queue, nvme_sq); if (sq->qid == 0) { - /* Let inflight controller teardown complete */ - flush_workqueue(nvmet_wq); + struct nvmet_tcp_queue *q; + int pending = 0; + + /* Check for pending controller teardown */ + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) { + if (q->nvme_sq.ctrl == sq->ctrl && + q->state == NVMET_TCP_Q_DISCONNECTING) + pending++; + } + mutex_unlock(&nvmet_tcp_queue_mutex); + if (pending > NVMET_TCP_BACKLOG) + return NVME_SC_CONNECT_CTRL_BUSY; } queue->nr_cmds = sq->size * 2; -- cgit From 31deaeb11ba7a885116c9c30892b9f763c04d59c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 8 Dec 2023 13:53:21 +0100 Subject: nvmet-rdma: avoid circular locking dependency on install_queue() nvmet_rdma_install_queue() is driven from the ->io_work workqueue function, but will call flush_workqueue() which might trigger ->release_work() which in itself calls flush_work on ->io_work. To avoid that check for pending queue in disconnecting status, and return 'controller busy' when we reached a certain threshold. Signed-off-by: Hannes Reinecke Tested-by: Shin'ichiro Kawasaki Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 4597bca43a6d..667f9c04f35d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -37,6 +37,8 @@ #define NVMET_RDMA_MAX_MDTS 8 #define NVMET_RDMA_MAX_METADATA_MDTS 5 +#define NVMET_RDMA_BACKLOG 128 + struct nvmet_rdma_srq; struct nvmet_rdma_cmd { @@ -1583,8 +1585,19 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } if (queue->host_qid == 0) { - /* Let inflight controller teardown complete */ - flush_workqueue(nvmet_wq); + struct nvmet_rdma_queue *q; + int pending = 0; + + /* Check for pending controller teardown */ + mutex_lock(&nvmet_rdma_queue_mutex); + list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) { + if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl && + q->state == NVMET_RDMA_Q_DISCONNECTING) + pending++; + } + mutex_unlock(&nvmet_rdma_queue_mutex); + if (pending > NVMET_RDMA_BACKLOG) + return NVME_SC_CONNECT_CTRL_BUSY; } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); @@ -1880,7 +1893,7 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - ret = rdma_listen(cm_id, 128); + ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG); if (ret) { pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; -- cgit