aboutsummaryrefslogtreecommitdiff
path: root/drivers/nvme/target/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/target/core.c')
-rw-r--r--drivers/nvme/target/core.c150
1 files changed, 95 insertions, 55 deletions
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 25cc2ee8de3f..ac7210a3ea1c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -16,6 +16,7 @@
#include "nvmet.h"
struct workqueue_struct *buffered_io_wq;
+struct workqueue_struct *zbd_wq;
static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
static DEFINE_IDA(cntlid_ida);
@@ -43,43 +44,34 @@ DECLARE_RWSEM(nvmet_ana_sem);
inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
{
- u16 status;
-
switch (errno) {
case 0:
- status = NVME_SC_SUCCESS;
- break;
+ return NVME_SC_SUCCESS;
case -ENOSPC:
req->error_loc = offsetof(struct nvme_rw_command, length);
- status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
- break;
+ return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
case -EREMOTEIO:
req->error_loc = offsetof(struct nvme_rw_command, slba);
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
- break;
+ return NVME_SC_LBA_RANGE | NVME_SC_DNR;
case -EOPNOTSUPP:
req->error_loc = offsetof(struct nvme_common_command, opcode);
switch (req->cmd->common.opcode) {
case nvme_cmd_dsm:
case nvme_cmd_write_zeroes:
- status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
- break;
+ return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
default:
- status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
break;
case -ENODATA:
req->error_loc = offsetof(struct nvme_rw_command, nsid);
- status = NVME_SC_ACCESS_DENIED;
- break;
+ return NVME_SC_ACCESS_DENIED;
case -EIO:
fallthrough;
default:
req->error_loc = offsetof(struct nvme_common_command, opcode);
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ return NVME_SC_INTERNAL | NVME_SC_DNR;
}
-
- return status;
}
u16 nvmet_report_invalid_opcode(struct nvmet_req *req)
@@ -122,11 +114,11 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
return 0;
}
-static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
+static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
{
- unsigned long nsid = 0;
struct nvmet_ns *cur;
unsigned long idx;
+ u32 nsid = 0;
xa_for_each(&subsys->namespaces, idx, cur)
nsid = cur->nsid;
@@ -141,14 +133,13 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
{
- u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
struct nvmet_req *req;
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, status);
+ nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
@@ -388,10 +379,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
{
struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvmet_ctrl, ka_work);
- bool cmd_seen = ctrl->cmd_seen;
+ bool reset_tbkas = ctrl->reset_tbkas;
- ctrl->cmd_seen = false;
- if (cmd_seen) {
+ ctrl->reset_tbkas = false;
+ if (reset_tbkas) {
pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
ctrl->cntlid);
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -412,7 +403,6 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
pr_debug("ctrl %d start keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
- INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
@@ -693,6 +683,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
uuid_gen(&ns->uuid);
ns->buffered_io = false;
+ ns->csi = NVME_CSI_NVM;
return ns;
}
@@ -804,6 +795,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
percpu_ref_exit(&sq->ref);
if (ctrl) {
+ /*
+ * The teardown flow may take some time, and the host may not
+ * send us keep-alive during this period, hence reset the
+ * traffic based keep-alive timer so we don't trigger a
+ * controller teardown as a result of a keep-alive expiration.
+ */
+ ctrl->reset_tbkas = true;
nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
@@ -888,10 +886,18 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
}
- if (req->ns->file)
- return nvmet_file_parse_io_cmd(req);
-
- return nvmet_bdev_parse_io_cmd(req);
+ switch (req->ns->csi) {
+ case NVME_CSI_NVM:
+ if (req->ns->file)
+ return nvmet_file_parse_io_cmd(req);
+ return nvmet_bdev_parse_io_cmd(req);
+ case NVME_CSI_ZNS:
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ return nvmet_bdev_zns_parse_io_cmd(req);
+ return NVME_SC_INVALID_IO_CMD_SET;
+ default:
+ return NVME_SC_INVALID_IO_CMD_SET;
+ }
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@@ -952,7 +958,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
}
if (sq->ctrl)
- sq->ctrl->cmd_seen = true;
+ sq->ctrl->reset_tbkas = true;
return true;
@@ -998,19 +1004,23 @@ static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
return req->transfer_len - req->metadata_len;
}
-static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
+static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
+ struct nvmet_req *req)
{
- req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
+ req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
nvmet_data_transfer_len(req));
if (!req->sg)
goto out_err;
if (req->metadata_len) {
- req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
+ req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
&req->metadata_sg_cnt, req->metadata_len);
if (!req->metadata_sg)
goto out_free_sg;
}
+
+ req->p2p_dev = p2p_dev;
+
return 0;
out_free_sg:
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
@@ -1018,25 +1028,19 @@ out_err:
return -ENOMEM;
}
-static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
+static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
{
- if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
- return false;
-
- if (req->sq->ctrl && req->sq->qid && req->ns) {
- req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
- req->ns->nsid);
- if (req->p2p_dev)
- return true;
- }
-
- req->p2p_dev = NULL;
- return false;
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
+ !req->sq->ctrl || !req->sq->qid || !req->ns)
+ return NULL;
+ return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
}
int nvmet_req_alloc_sgls(struct nvmet_req *req)
{
- if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
+ struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
+
+ if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
return 0;
req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
@@ -1065,6 +1069,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req)
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
if (req->metadata_sg)
pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+ req->p2p_dev = NULL;
} else {
sgl_free(req->sg);
if (req->metadata_sg)
@@ -1113,6 +1118,17 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
}
+static inline bool nvmet_css_supported(u8 cc_css)
+{
+ switch (cc_css <<= NVME_CC_CSS_SHIFT) {
+ case NVME_CC_CSS_NVM:
+ case NVME_CC_CSS_CSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
{
lockdep_assert_held(&ctrl->lock);
@@ -1132,7 +1148,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
if (nvmet_cc_mps(ctrl->cc) != 0 ||
nvmet_cc_ams(ctrl->cc) != 0 ||
- nvmet_cc_css(ctrl->cc) != 0) {
+ !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) {
ctrl->csts = NVME_CSTS_CFS;
return;
}
@@ -1183,6 +1199,8 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
{
/* command sets supported: NVMe command set: */
ctrl->cap = (1ULL << 37);
+ /* Controller supports one or more I/O Command Sets */
+ ctrl->cap |= (1ULL << 43);
/* CC.EN timeout in 500msec units: */
ctrl->cap |= (15ULL << 24);
/* maximum queue entries supported: */
@@ -1352,6 +1370,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
INIT_LIST_HEAD(&ctrl->async_events);
INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
+ INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -1372,7 +1391,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
goto out_free_changed_ns_list;
if (subsys->cntlid_min > subsys->cntlid_max)
- goto out_free_changed_ns_list;
+ goto out_free_sqs;
ret = ida_simple_get(&cntlid_ida,
subsys->cntlid_min, subsys->cntlid_max,
@@ -1493,6 +1512,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type)
{
struct nvmet_subsys *subsys;
+ char serial[NVMET_SN_MAX_SIZE / 2];
+ int ret;
subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
if (!subsys)
@@ -1500,7 +1521,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
subsys->ver = NVMET_DEFAULT_VS;
/* generate a random serial number as our controllers are ephemeral: */
- get_random_bytes(&subsys->serial, sizeof(subsys->serial));
+ get_random_bytes(&serial, sizeof(serial));
+ bin2hex(subsys->serial, &serial, sizeof(serial));
+
+ subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
+ if (!subsys->model_number) {
+ ret = -ENOMEM;
+ goto free_subsys;
+ }
switch (type) {
case NVME_NQN_NVME:
@@ -1511,15 +1539,15 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
break;
default:
pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
- kfree(subsys);
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto free_mn;
}
subsys->type = type;
subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
GFP_KERNEL);
if (!subsys->subsysnqn) {
- kfree(subsys);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto free_mn;
}
subsys->cntlid_min = NVME_CNTLID_MIN;
subsys->cntlid_max = NVME_CNTLID_MAX;
@@ -1531,6 +1559,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
INIT_LIST_HEAD(&subsys->hosts);
return subsys;
+
+free_mn:
+ kfree(subsys->model_number);
+free_subsys:
+ kfree(subsys);
+ return ERR_PTR(ret);
}
static void nvmet_subsys_free(struct kref *ref)
@@ -1569,11 +1603,15 @@ static int __init nvmet_init(void)
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
+ zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
+ if (!zbd_wq)
+ return -ENOMEM;
+
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) {
error = -ENOMEM;
- goto out;
+ goto out_free_zbd_work_queue;
}
error = nvmet_init_discovery();
@@ -1589,7 +1627,8 @@ out_exit_discovery:
nvmet_exit_discovery();
out_free_work_queue:
destroy_workqueue(buffered_io_wq);
-out:
+out_free_zbd_work_queue:
+ destroy_workqueue(zbd_wq);
return error;
}
@@ -1599,6 +1638,7 @@ static void __exit nvmet_exit(void)
nvmet_exit_discovery();
ida_destroy(&cntlid_ida);
destroy_workqueue(buffered_io_wq);
+ destroy_workqueue(zbd_wq);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);