aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <[email protected]>2020-05-27 05:17:10 -0600
committerJens Axboe <[email protected]>2020-05-27 05:17:10 -0600
commitd92f79a55232405d66ca343294f873cefd04ddd7 (patch)
tree2dae0e98d542d1b1c4fd5273f7775a34ce668375
parent263c61581a38d0a5ad1f5f4a9143b27d68caeffd (diff)
parent6b6e89636f51581895922780c3c4fd51bb9e1483 (diff)
Merge branch 'nvme-5.8' of git://git.infradead.org/nvme into for-5.8/drivers
Pull NVMe updates from Christoph: "The second large batch of nvme updates: - t10 protection information support for nvme-rdma and nvmet-rdma (Israel Rukshin and Max Gurtovoy) - target side AEN improvements (Chaitanya Kulkarni) - various fixes and minor improvements all over, icluding the nvme part of the lpfc driver" * 'nvme-5.8' of git://git.infradead.org/nvme: (38 commits) lpfc: Fix return value in __lpfc_nvme_ls_abort lpfc: fix axchg pointer reference after free and double frees lpfc: Fix pointer checks and comments in LS receive refactoring nvme: set dma alignment to qword nvmet: cleanups the loop in nvmet_async_events_process nvmet: fix memory leak when removing namespaces and controllers concurrently nvmet-rdma: add metadata/T10-PI support nvmet: add metadata support for block devices nvmet: add metadata/T10-PI support nvme: add Metadata Capabilities enumerations nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len nvmet: rename nvmet_rw_len to nvmet_rw_data_len nvmet: add metadata characteristics for a namespace nvme-rdma: add metadata/T10-PI support nvme-rdma: introduce nvme_rdma_sgl structure nvme: introduce NVME_INLINE_METADATA_SG_CNT nvme: enforce extended LBA format for fabrics metadata nvme: introduce max_integrity_segments ctrl attribute nvme: make nvme_ns_has_pi accessible to transports nvme: introduce NVME_NS_METADATA_SUPPORTED flag ...
-rw-r--r--drivers/nvme/host/core.c97
-rw-r--r--drivers/nvme/host/fc.c6
-rw-r--r--drivers/nvme/host/lightnvm.c7
-rw-r--r--drivers/nvme/host/nvme.h18
-rw-r--r--drivers/nvme/host/pci.c28
-rw-r--r--drivers/nvme/host/rdma.c321
-rw-r--r--drivers/nvme/host/tcp.c11
-rw-r--r--drivers/nvme/target/Kconfig1
-rw-r--r--drivers/nvme/target/admin-cmd.c45
-rw-r--r--drivers/nvme/target/configfs.c90
-rw-r--r--drivers/nvme/target/core.c166
-rw-r--r--drivers/nvme/target/discovery.c8
-rw-r--r--drivers/nvme/target/fabrics-cmd.c15
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c113
-rw-r--r--drivers/nvme/target/io-cmd-file.c6
-rw-r--r--drivers/nvme/target/nvmet.h34
-rw-r--r--drivers/nvme/target/rdma.c238
-rw-r--r--drivers/nvme/target/tcp.c53
-rw-r--r--drivers/nvme/target/trace.h28
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c29
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c10
-rw-r--r--include/linux/genhd.h4
-rw-r--r--include/linux/nvme.h8
24 files changed, 1116 insertions, 222 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 805d289e6cd9..569671e264b5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -19,7 +19,6 @@
#include <linux/pr.h>
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
-#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
@@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
nvme_put_ctrl(ctrl);
}
-static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
-{
- return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
-}
-
static blk_status_t nvme_error_status(u16 status)
{
switch (status & 0x7ff) {
@@ -552,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
- return ret;
+ goto out_disable_stream;
ctrl->nssa = le16_to_cpu(s.nssa);
if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
dev_info(ctrl->device, "too few streams (%u) available\n",
ctrl->nssa);
- nvme_disable_streams(ctrl);
- return 0;
+ goto out_disable_stream;
}
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0;
+
+out_disable_stream:
+ nvme_disable_streams(ctrl);
+ return ret;
}
/*
@@ -1302,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = (io.nblocks + 1) * ns->ms;
metadata = nvme_to_user_ptr(io.metadata);
- if (ns->ext) {
+ if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
@@ -1696,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+ u32 max_integrity_segments)
{
struct blk_integrity integrity;
@@ -1719,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
}
integrity.tuple_size = ms;
blk_integrity_register(disk, &integrity);
- blk_queue_max_integrity_segments(disk->queue, 1);
+ blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+ u32 max_integrity_segments)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -1842,7 +1841,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
- u32 atomic_bs, phys_bs, io_opt;
+ u32 atomic_bs, phys_bs, io_opt = 0;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
@@ -1851,7 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
- atomic_bs = phys_bs = io_opt = bs;
+ atomic_bs = phys_bs = bs;
nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
if (id->nabo == 0) {
/*
@@ -1882,13 +1881,28 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);
- if (ns->ms && !ns->ext &&
- (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
- nvme_init_integrity(disk, ns->ms, ns->pi_type);
- if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
- ns->lba_shift > PAGE_SHIFT)
+ /*
+ * The block layer can't support LBA sizes larger than the page size
+ * yet, so catch this early and don't allow block I/O.
+ */
+ if (ns->lba_shift > PAGE_SHIFT)
capacity = 0;
+ /*
+ * Register a metadata profile for PI, or the plain non-integrity NVMe
+ * metadata masquerading as Type 0 if supported, otherwise reject block
+ * I/O to namespaces with metadata except when the namespace supports
+ * PI, as it can strip/insert in that case.
+ */
+ if (ns->ms) {
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ (ns->features & NVME_NS_METADATA_SUPPORTED))
+ nvme_init_integrity(disk, ns->ms, ns->pi_type,
+ ns->ctrl->max_integrity_segments);
+ else if (!nvme_ns_has_pi(ns))
+ capacity = 0;
+ }
+
set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
@@ -1902,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}
-static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
+static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;
/*
@@ -1915,20 +1930,43 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
- if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
- is_power_of_2(ns->ctrl->max_hw_sectors))
- iob = ns->ctrl->max_hw_sectors;
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+ is_power_of_2(ctrl->max_hw_sectors))
+ iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
+ ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
ns->pi_type = 0;
+ if (ns->ms) {
+ /*
+ * For PCIe only the separate metadata pointer is supported,
+ * as the block layer supplies metadata in a separate bio_vec
+ * chain. For Fabrics, only metadata as part of extended data
+ * LBA is supported on the wire per the Fabrics specification,
+ * but the HBA/HCA will do the remapping from the separate
+ * metadata buffers for us.
+ */
+ if (id->flbas & NVME_NS_FLBAS_META_EXT) {
+ ns->features |= NVME_NS_EXT_LBAS;
+ if ((ctrl->ops->flags & NVME_F_FABRICS) &&
+ (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
+ ctrl->max_integrity_segments)
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ } else {
+ if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
+ return -EINVAL;
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ }
+ }
+
if (iob)
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id);
@@ -1939,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
revalidate_disk(ns->head->disk);
}
#endif
+ return 0;
}
static int nvme_revalidate_disk(struct gendisk *disk)
@@ -1974,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
goto free_id;
}
- __nvme_revalidate_disk(disk, id);
+ ret = __nvme_revalidate_disk(disk, id);
free_id:
kfree(id);
out:
@@ -2283,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
blk_queue_virt_boundary(q, ctrl->page_size - 1);
+ blk_queue_dma_alignment(q, 7);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
blk_queue_write_cache(q, vwc, vwc);
@@ -3628,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
- __nvme_revalidate_disk(disk, id);
+ if (__nvme_revalidate_disk(disk, id))
+ goto out_free_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ret = nvme_nvm_register(ns, disk_name, node);
@@ -3655,6 +3696,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk);
+ out_free_disk:
+ del_gendisk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0b3ab3355e25..cb0007592c12 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -108,7 +108,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
struct scatterlist sgl[NVME_INLINE_SG_CNT];
- uint8_t priv[0];
+ uint8_t priv[];
};
struct nvme_fc_lport {
@@ -3246,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity.\n",
- ctrl->cnum, portptr->dev_loss_tmo);
+ ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
+ (ctrl->ctrl.opts->max_reconnects *
+ ctrl->ctrl.opts->reconnect_delay)));
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ec46693f6b64..69608755d415 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
__le32 tdresv;
__le32 thresv;
__le32 rsvd2[8];
- __u8 blk[0];
+ __u8 blk[];
};
struct nvme_nvm_id20_addrf {
@@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
geo = &dev->geo;
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
- geo->ext = ns->ext;
+ if (ns->features & NVME_NS_EXT_LBAS)
+ geo->ext = true;
+ else
+ geo->ext = false;
geo->mdts = ns->ctrl->max_hw_sectors;
dev->q = q;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f3ab17778349..fa5c75501049 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -16,6 +16,7 @@
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
#include <linux/wait.h>
+#include <linux/t10-pi.h>
#include <trace/events/block.h>
@@ -30,8 +31,10 @@ extern unsigned int admin_timeout;
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define NVME_INLINE_SG_CNT 0
+#define NVME_INLINE_METADATA_SG_CNT 0
#else
#define NVME_INLINE_SG_CNT 2
+#define NVME_INLINE_METADATA_SG_CNT 1
#endif
extern struct workqueue_struct *nvme_wq;
@@ -228,6 +231,7 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
+ u32 max_integrity_segments;
u16 crdt[3];
u16 oncs;
u16 oacs;
@@ -364,6 +368,11 @@ struct nvme_ns_head {
#endif
};
+enum nvme_ns_features {
+ NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
+ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
+};
+
struct nvme_ns {
struct list_head list;
@@ -383,8 +392,8 @@ struct nvme_ns {
u16 ms;
u16 sgs;
u32 sws;
- bool ext;
u8 pi_type;
+ unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
@@ -394,6 +403,12 @@ struct nvme_ns {
};
+/* NVMe ns supports metadata actions by the controller (generate/strip) */
+static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+{
+ return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+}
+
struct nvme_ctrl_ops {
const char *name;
struct module *module;
@@ -497,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
-void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b0978ac554d5..b307c06a783d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+static int io_queue_count_set(const char *val, const struct kernel_param *kp)
+{
+ unsigned int n;
+ int ret;
+
+ ret = kstrtouint(val, 10, &n);
+ if (ret != 0 || n > num_possible_cpus())
+ return -EINVAL;
+ return param_set_uint(val, kp);
+}
+
+static const struct kernel_param_ops io_queue_count_ops = {
+ .set = io_queue_count_set,
+ .get = param_get_uint,
+};
+
static unsigned int write_queues;
-module_param(write_queues, uint, 0644);
+module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
static unsigned int poll_queues;
-module_param(poll_queues, uint, 0644);
+module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@@ -2549,6 +2565,12 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
+ /*
+ * We do not support an SGL for metadata (yet), so we are limited to a
+ * single integrity segment for the separate metadata pointer.
+ */
+ dev->ctrl.max_integrity_segments = 1;
+
result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
@@ -3118,8 +3140,6 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
- write_queues = min(write_queues, num_possible_cpus());
- poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cac8a930396a..f8f856dc0c67 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -34,6 +34,11 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
+#define NVME_RDMA_DATA_SGL_SIZE \
+ (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
+#define NVME_RDMA_METADATA_SGL_SIZE \
+ (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
+
struct nvme_rdma_device {
struct ib_device *dev;
struct ib_pd *pd;
@@ -48,6 +53,11 @@ struct nvme_rdma_qe {
u64 dma;
};
+struct nvme_rdma_sgl {
+ int nents;
+ struct sg_table sg_table;
+};
+
struct nvme_rdma_queue;
struct nvme_rdma_request {
struct nvme_request req;
@@ -58,12 +68,12 @@ struct nvme_rdma_request {
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
- int nents;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
- struct sg_table sg_table;
- struct scatterlist first_sgl[];
+ struct nvme_rdma_sgl data_sgl;
+ struct nvme_rdma_sgl *metadata_sgl;
+ bool use_sig_mr;
};
enum nvme_rdma_queue_flags {
@@ -85,6 +95,7 @@ struct nvme_rdma_queue {
struct rdma_cm_id *cm_id;
int cm_error;
struct completion cm_done;
+ bool pi_support;
};
struct nvme_rdma_ctrl {
@@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq;
init_attr.recv_cq = queue->ib_cq;
+ if (queue->pi_support)
+ init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
if (!req->sqe.data)
return -ENOMEM;
+ /* metadata nvme_rdma_sgl struct is located after command's data SGL */
+ if (queue->pi_support)
+ req->metadata_sgl = (void *)nvme_req(rq) +
+ sizeof(struct nvme_rdma_request) +
+ NVME_RDMA_DATA_SGL_SIZE;
+
req->queue = queue;
return 0;
@@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
dev = queue->device;
ibdev = dev->dev;
+ if (queue->pi_support)
+ ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
/*
@@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
-static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
+static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
{
- return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
- ibdev->attrs.max_fast_reg_page_list_len - 1);
+ u32 max_page_list_len;
+
+ if (pi_support)
+ max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
+ else
+ max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
+
+ return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* misaligned we'll end up using two entries for a single data page,
* so one additional entry is required.
*/
- pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
+ pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
@@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_ring;
}
+ if (queue->pi_support) {
+ ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
+ queue->queue_size, IB_MR_TYPE_INTEGRITY,
+ pages_per_mr, pages_per_mr);
+ if (ret) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to initialize PI MR pool sized %d for QID %d\n",
+ queue->queue_size, idx);
+ goto out_destroy_mr_pool;
+ }
+ }
+
set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
return 0;
+out_destroy_mr_pool:
+ ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
out_destroy_ring:
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
queue = &ctrl->queues[idx];
queue->ctrl = ctrl;
+ if (idx && ctrl->ctrl.max_integrity_segments)
+ queue->pi_support = true;
+ else
+ queue->pi_support = false;
init_completion(&queue->cm_done);
if (idx > 0)
@@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+ NVME_RDMA_DATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+ NVME_RDMA_DATA_SGL_SIZE;
+ if (nctrl->max_integrity_segments)
+ set->cmd_size += sizeof(struct nvme_rdma_sgl) +
+ NVME_RDMA_METADATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool new)
{
+ bool pi_capable = false;
int error;
error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
@@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->device = ctrl->queues[0].device;
ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
- ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
+ /* T10-PI support */
+ if (ctrl->device->dev->attrs.device_cap_flags &
+ IB_DEVICE_INTEGRITY_HANDOVER)
+ pi_capable = true;
+
+ ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
+ pi_capable);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
@@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->ctrl.max_segments = ctrl->max_fr_pages;
ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+ if (pi_capable)
+ ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+ else
+ ctrl->ctrl.max_integrity_segments = 0;
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
+ struct list_head *pool = &queue->qp->rdma_mrs;
if (!blk_rq_nr_phys_segments(rq))
return;
+ if (blk_integrity_rq(rq)) {
+ ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents, rq_dma_dir(rq));
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
+ }
+
+ if (req->use_sig_mr)
+ pool = &queue->qp->sig_mrs;
+
if (req->mr) {
- ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
+ ib_mr_pool_put(queue->qp, pool, req->mr);
req->mr = NULL;
}
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
- sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
int count)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
- struct scatterlist *sgl = req->sg_table.sgl;
+ struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
int i;
@@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
{
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
- sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
- put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
+ sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
+ put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
@@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
* Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary.
*/
- nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
+ nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
+ SZ_4K);
if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
@@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
return 0;
}
+static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_domain *domain,
+ u16 control, u8 pi_type)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+ domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ domain->sig.dif.ref_remap = true;
+
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_escape = true;
+ if (pi_type == NVME_NS_DPS_PI_TYPE3)
+ domain->sig.dif.ref_escape = true;
+}
+
+static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
+ u8 pi_type)
+{
+ u16 control = le16_to_cpu(cmd->rw.control);
+
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+ if (control & NVME_RW_PRINFO_PRACT) {
+ /* for WRITE_INSERT/READ_STRIP no memory domain */
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ /* Clear the PRACT bit since HCA will generate/verify the PI */
+ control &= ~NVME_RW_PRINFO_PRACT;
+ cmd->rw.control = cpu_to_le16(control);
+ } else {
+ /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ }
+}
+
+static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
+{
+ *mask = 0;
+ if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
+ *mask |= IB_SIG_CHECK_REFTAG;
+ if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
+ *mask |= IB_SIG_CHECK_GUARD;
+}
+
+static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ nvme_rdma_wr_error(cq, wc, "SIG");
+}
+
+static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
+ struct nvme_rdma_request *req, struct nvme_command *c,
+ int count, int pi_count)
+{
+ struct nvme_rdma_sgl *sgl = &req->data_sgl;
+ struct ib_reg_wr *wr = &req->reg_wr;
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ struct nvme_ns *ns = rq->q->queuedata;
+ struct bio *bio = rq->bio;
+ struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+ int nr;
+
+ req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
+ if (WARN_ON_ONCE(!req->mr))
+ return -EAGAIN;
+
+ nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
+ req->metadata_sgl->sg_table.sgl, pi_count, NULL,
+ SZ_4K);
+ if (unlikely(nr))
+ goto mr_put;
+
+ nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
+ req->mr->sig_attrs, ns->pi_type);
+ nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
+
+ ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
+
+ req->reg_cqe.done = nvme_rdma_sig_done;
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
+ wr->wr.wr_cqe = &req->reg_cqe;
+ wr->wr.num_sge = 0;
+ wr->wr.send_flags = 0;
+ wr->mr = req->mr;
+ wr->key = req->mr->rkey;
+ wr->access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
+
+ sg->addr = cpu_to_le64(req->mr->iova);
+ put_unaligned_le24(req->mr->length, sg->length);
+ put_unaligned_le32(req->mr->rkey, sg->key);
+ sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
+
+ return 0;
+
+mr_put:
+ ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
+ req->mr = NULL;
+ if (nr < 0)
+ return nr;
+ return -EINVAL;
+}
+
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
+ int pi_count = 0;
int count, ret;
req->num_sge = 1;
@@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
if (!blk_rq_nr_phys_segments(rq))
return nvme_rdma_set_sg_null(c);
- req->sg_table.sgl = req->first_sgl;
- ret = sg_alloc_table_chained(&req->sg_table,
- blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
+ req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
+ ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
+ blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
- req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+ req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
+ req->data_sgl.sg_table.sgl);
- count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
- rq_dma_dir(rq));
+ count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
+ req->data_sgl.nents, rq_dma_dir(rq));
if (unlikely(count <= 0)) {
ret = -EIO;
goto out_free_table;
}
+ if (blk_integrity_rq(rq)) {
+ req->metadata_sgl->sg_table.sgl =
+ (struct scatterlist *)(req->metadata_sgl + 1);
+ ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
+ blk_rq_count_integrity_sg(rq->q, rq->bio),
+ req->metadata_sgl->sg_table.sgl,
+ NVME_INLINE_METADATA_SG_CNT);
+ if (unlikely(ret)) {
+ ret = -ENOMEM;
+ goto out_unmap_sg;
+ }
+
+ req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
+ rq->bio, req->metadata_sgl->sg_table.sgl);
+ pi_count = ib_dma_map_sg(ibdev,
+ req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents,
+ rq_dma_dir(rq));
+ if (unlikely(pi_count <= 0)) {
+ ret = -EIO;
+ goto out_free_pi_table;
+ }
+ }
+
+ if (req->use_sig_mr) {
+ ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
+ goto out;
+ }
+
if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
@@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
- goto out_unmap_sg;
+ goto out_unmap_pi_sg;
return 0;
+out_unmap_pi_sg:
+ if (blk_integrity_rq(rq))
+ ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents, rq_dma_dir(rq));
+out_free_pi_table:
+ if (blk_integrity_rq(rq))
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
out_unmap_sg:
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
out_free_table:
- sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
return ret;
}
@@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ queue->pi_support &&
+ (c->common.opcode == nvme_cmd_write ||
+ c->common.opcode == nvme_cmd_read) &&
+ nvme_ns_has_pi(ns))
+ req->use_sig_mr = true;
+ else
+ req->use_sig_mr = false;
+
err = nvme_rdma_map_data(queue, rq, c);
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
@@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
return ib_process_cq_direct(queue->ib_cq, -1);
}
+static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
+{
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ struct ib_mr_status mr_status;
+ int ret;
+
+ ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ nvme_req(rq)->status = NVME_SC_INVALID_PI;
+ return;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
+ break;
+ }
+ pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type, mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ }
+}
+
static void nvme_rdma_complete_rq(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
+ if (req->use_sig_mr)
+ nvme_rdma_check_pi_status(req);
+
nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@@ -1926,7 +2193,7 @@ out_fail:
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.name = "rdma",
.module = THIS_MODULE,
- .flags = NVME_F_FABRICS,
+ .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c79e248b9f43..7c7c1886642f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -885,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
if (last && !queue->data_digest)
flags |= MSG_EOR;
else
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
/* can't zcopy slab pages */
if (unlikely(PageSlab(page))) {
@@ -924,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
bool inline_data = nvme_tcp_has_inline_data(req);
- int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
+ int flags = MSG_DONTWAIT;
int ret;
+ if (inline_data)
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ else
+ flags |= MSG_EOR;
+
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
@@ -967,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE);
+ MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (unlikely(ret <= 0))
return ret;
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index d7f48c0fb311..4474952d64c6 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -4,6 +4,7 @@ config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
+ select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 4c79aa804887..1db8c0498668 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -295,7 +295,7 @@ out:
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+ if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
@@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ u32 cmd_capsule_size;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
- /* Max command capsule size is sqe + single page of in-capsule data */
- id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
- req->port->inline_data_size) / 16);
+ /*
+ * Max command capsule size is sqe + in-capsule data size.
+ * Disable in-capsule data for Metadata capable controllers.
+ */
+ cmd_capsule_size = sizeof(struct nvme_command);
+ if (!ctrl->pi_support)
+ cmd_capsule_size += req->port->inline_data_size;
+ id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
+
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
@@ -465,6 +472,7 @@ out:
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
struct nvme_id_ns *id;
u16 status = 0;
@@ -482,14 +490,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
}
/* return an all zeroed buffer if we can't find an active namespace */
- ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
if (!ns)
goto done;
- if (ns->bdev)
- nvmet_bdev_ns_revalidate(ns);
- else
- nvmet_file_ns_revalidate(ns);
+ nvmet_ns_revalidate(ns);
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
@@ -526,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->lbaf[0].ds = ns->blksize_shift;
+ if (ctrl->pi_support && nvmet_ns_has_pi(ns)) {
+ id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
+ NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
+ NVME_NS_DPC_PI_TYPE3;
+ id->mc = NVME_MC_EXTENDED_LBA;
+ id->dps = ns->pi_type;
+ id->flbas = NVME_NS_FLBAS_META_EXT;
+ id->lbaf[0].ms = cpu_to_le16(ns->metadata_size);
+ }
+
if (ns->readonly)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
@@ -630,7 +645,7 @@ out:
static void nvmet_execute_identify(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
switch (req->cmd->identify.cns) {
@@ -659,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
@@ -748,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u16 nsqr;
u16 ncqr;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@@ -820,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
- if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
+ if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
return;
switch (cdw10 & 0xff) {
@@ -887,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
mutex_lock(&ctrl->lock);
@@ -906,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ae8fb4489a10..419e0d4ce79b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -248,6 +248,36 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
+}
+
+static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (port->enabled) {
+ pr_err("Disable port before setting pi_enable value.\n");
+ return -EACCES;
+ }
+
+ port->pi_enable = val;
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_pi_enable);
+#endif
+
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
@@ -324,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
kfree(ns->device_path);
ret = -ENOMEM;
- ns->device_path = kstrndup(page, len, GFP_KERNEL);
+ ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
if (!ns->device_path)
goto out_unlock;
@@ -540,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
+static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (!val)
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (!ns->enabled) {
+ pr_err("enable ns before revalidate.\n");
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+ nvmet_ns_revalidate(ns);
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+}
+
+CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@@ -547,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
+ &nvmet_ns_attr_revalidate_size,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
@@ -960,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
- new_model_number = kstrndup(page, len, GFP_KERNEL);
+ new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!new_model_number)
return -ENOMEM;
@@ -984,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
+}
+
+static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ bool pi_enable;
+
+ if (strtobool(page, &pi_enable))
+ return -EINVAL;
+
+ subsys->pi_support = pi_enable;
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
+#endif
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
@@ -991,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_subsys_attr_attr_pi_enable,
+#endif
NULL,
};
@@ -1146,7 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
-struct nvmet_type_name_map nvmet_ana_state[] = {
+static struct nvmet_type_name_map nvmet_ana_state[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
@@ -1290,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_attr_param_pi_enable,
+#endif
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b685f99d56a1..6392bcd30bd7 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
struct nvmet_async_event *aen;
struct nvmet_req *req;
- while (1) {
- mutex_lock(&ctrl->lock);
- aen = list_first_entry_or_null(&ctrl->async_events,
- struct nvmet_async_event, entry);
- if (!aen || !ctrl->nr_async_event_cmds) {
- mutex_unlock(&ctrl->lock);
- break;
- }
-
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
+ aen = list_first_entry(&ctrl->async_events,
+ struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
if (status == 0)
nvmet_set_result(req, nvmet_async_event_result(aen));
@@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
kfree(aen);
mutex_unlock(&ctrl->lock);
+ trace_nvmet_async_event(ctrl, req->cqe->result.u32);
nvmet_req_complete(req, status);
+ mutex_lock(&ctrl->lock);
}
+ mutex_unlock(&ctrl->lock);
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
- struct nvmet_req *req;
+ struct nvmet_async_event *aen, *tmp;
mutex_lock(&ctrl->lock);
- while (ctrl->nr_async_event_cmds) {
- req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
- mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
- mutex_lock(&ctrl->lock);
+ list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
+ list_del(&aen->entry);
+ kfree(aen);
}
mutex_unlock(&ctrl->lock);
}
@@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port)
if (!try_module_get(ops->owner))
return -EINVAL;
- ret = ops->add_port(port);
- if (ret) {
- module_put(ops->owner);
- return ret;
+ /*
+ * If the user requested PI support and the transport isn't pi capable,
+ * don't enable the port.
+ */
+ if (port->pi_enable && !ops->metadata_support) {
+ pr_err("T10-PI is not supported by transport type %d\n",
+ port->disc_addr.trtype);
+ ret = -EINVAL;
+ goto out_put;
}
+ ret = ops->add_port(port);
+ if (ret)
+ goto out_put;
+
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
@@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port)
port->enabled = true;
port->tr_ops = ops;
return 0;
+
+out_put:
+ module_put(ops->owner);
+ return ret;
}
void nvmet_disable_port(struct nvmet_port *port)
@@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
ns->nsid);
}
+void nvmet_ns_revalidate(struct nvmet_ns *ns)
+{
+ loff_t oldsize = ns->size;
+
+ if (ns->bdev)
+ nvmet_bdev_ns_revalidate(ns);
+ else
+ nvmet_file_ns_revalidate(ns);
+
+ if (oldsize != ns->size)
+ nvmet_ns_changed(ns->subsys, ns->nsid);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
@@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
- if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
+ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
nvmet_async_events_process(ctrl, status);
- nvmet_async_events_free(ctrl);
- }
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
@@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sq = sq;
req->ops = ops;
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
req->transfer_len = 0;
+ req->metadata_len = 0;
req->cqe->status = 0;
req->cqe->sq_head = 0;
req->ns = NULL;
@@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
- if (unlikely(data_len != req->transfer_len)) {
+ if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
return false;
@@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
return true;
}
-EXPORT_SYMBOL_GPL(nvmet_check_data_len);
+EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
@@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
return true;
}
-int nvmet_req_alloc_sgl(struct nvmet_req *req)
+static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
{
- struct pci_dev *p2p_dev = NULL;
+ return req->transfer_len - req->metadata_len;
+}
- if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
- if (req->sq->ctrl && req->ns)
- p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
- req->ns->nsid);
+static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
+{
+ req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
+ nvmet_data_transfer_len(req));
+ if (!req->sg)
+ goto out_err;
- req->p2p_dev = NULL;
- if (req->sq->qid && p2p_dev) {
- req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
- req->transfer_len);
- if (req->sg) {
- req->p2p_dev = p2p_dev;
- return 0;
- }
- }
+ if (req->metadata_len) {
+ req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
+ &req->metadata_sg_cnt, req->metadata_len);
+ if (!req->metadata_sg)
+ goto out_free_sg;
+ }
+ return 0;
+out_free_sg:
+ pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+out_err:
+ return -ENOMEM;
+}
- /*
- * If no P2P memory was available we fallback to using
- * regular memory
- */
+static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
+{
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
+ return false;
+
+ if (req->sq->ctrl && req->sq->qid && req->ns) {
+ req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+ req->ns->nsid);
+ if (req->p2p_dev)
+ return true;
}
- req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ req->p2p_dev = NULL;
+ return false;
+}
+
+int nvmet_req_alloc_sgls(struct nvmet_req *req)
+{
+ if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
+ return 0;
+
+ req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
+ &req->sg_cnt);
if (unlikely(!req->sg))
- return -ENOMEM;
+ goto out;
+
+ if (req->metadata_len) {
+ req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
+ &req->metadata_sg_cnt);
+ if (unlikely(!req->metadata_sg))
+ goto out_free;
+ }
return 0;
+out_free:
+ sgl_free(req->sg);
+out:
+ return -ENOMEM;
}
-EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
-void nvmet_req_free_sgl(struct nvmet_req *req)
+void nvmet_req_free_sgls(struct nvmet_req *req)
{
- if (req->p2p_dev)
+ if (req->p2p_dev) {
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
- else
+ if (req->metadata_sg)
+ pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+ } else {
sgl_free(req->sg);
+ if (req->metadata_sg)
+ sgl_free(req->metadata_sg);
+ }
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
}
-EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
static inline bool nvmet_cc_en(u32 cc)
{
@@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref)
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
+ nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 0c2274b21e15..40cf0b6e6c9d 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
- if (!nvmet_check_data_len(req, data_len))
+ if (!nvmet_check_transfer_len(req, data_len))
return;
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
@@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
const char model[] = "Linux";
u16 status = 0;
- if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
@@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index feef15c38ec9..42bd12b8bf00 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_set.attrib & 1) {
@@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_get.attrib & 1) {
@@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
- if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
@@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
+ ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
+
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
- pr_info("creating controller %d for subsystem %s for NQN %s.\n",
- ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
+ pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
+ ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
+ ctrl->pi_support ? " T10-PI is enabled" : "");
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
- if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 0427e040e3dd..07055f7ac398 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
id->nows = to0based(ql->io_opt / ql->logical_block_size);
}
+static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
+{
+ struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
+
+ if (bi) {
+ ns->metadata_size = bi->tuple_size;
+ if (bi->profile == &t10_pi_type1_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE1;
+ else if (bi->profile == &t10_pi_type3_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+ else
+ /* Unsupported metadata type */
+ ns->metadata_size = 0;
+ }
+}
+
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+ ns->pi_type = 0;
+ ns->metadata_size = 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+ nvmet_bdev_ns_enable_integrity(ns);
+
return 0;
}
@@ -147,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio)
bio_put(bio);
}
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ struct blk_integrity *bi;
+ struct bio_integrity_payload *bip;
+ struct block_device *bdev = req->ns->bdev;
+ int rc;
+ size_t resid, len;
+
+ bi = bdev_get_integrity(bdev);
+ if (unlikely(!bi)) {
+ pr_err("Unable to locate bio_integrity\n");
+ return -ENODEV;
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_NOIO,
+ min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+ if (IS_ERR(bip)) {
+ pr_err("Unable to allocate bio_integrity_payload\n");
+ return PTR_ERR(bip);
+ }
+
+ bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
+ /* virtual start sector must be in integrity interval units */
+ bip_set_seed(bip, bio->bi_iter.bi_sector >>
+ (bi->interval_exp - SECTOR_SHIFT));
+
+ resid = bip->bip_iter.bi_size;
+ while (resid > 0 && sg_miter_next(miter)) {
+ len = min_t(size_t, miter->length, resid);
+ rc = bio_integrity_add_page(bio, miter->page, len,
+ offset_in_page(miter->addr));
+ if (unlikely(rc != len)) {
+ pr_err("bio_integrity_add_page() failed; %d\n", rc);
+ sg_miter_stop(miter);
+ return -ENOMEM;
+ }
+
+ resid -= len;
+ if (len < miter->length)
+ miter->consumed -= miter->length - len;
+ }
+ sg_miter_stop(miter);
+
+ return 0;
+}
+#else
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
@@ -154,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
struct scatterlist *sg;
struct blk_plug plug;
sector_t sector;
- int op, i;
+ int op, i, rc;
+ struct sg_mapping_iter prot_miter;
+ unsigned int iter_flags;
+ unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
- if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ if (!nvmet_check_transfer_len(req, total_len))
return;
if (!req->sg_cnt) {
@@ -168,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
op |= REQ_FUA;
+ iter_flags = SG_MITER_TO_SG;
} else {
op = REQ_OP_READ;
+ iter_flags = SG_MITER_FROM_SG;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
@@ -191,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_opf = op;
blk_start_plug(&plug);
+ if (req->metadata_len)
+ sg_miter_start(&prot_miter, req->metadata_sg,
+ req->metadata_sg_cnt, iter_flags);
+
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio,
+ &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
@@ -209,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sg_cnt--;
}
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
submit_bio(bio);
blk_finish_plug(&plug);
}
@@ -217,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
@@ -309,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
sector = le64_to_cpu(write_zeroes->slba) <<
@@ -336,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
+ if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
+ req->metadata_len = nvmet_rw_metadata_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index f0bd08d86ac0..0abbefd9925e 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
- if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
return;
if (!req->sg_cnt || !nr_bvec) {
@@ -285,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
@@ -375,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 3d981eb6e100..809691291e73 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -19,6 +19,7 @@
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
+#include <linux/t10-pi.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@@ -77,6 +78,8 @@ struct nvmet_ns {
int use_p2pmem;
struct pci_dev *p2p_dev;
+ int pi_type;
+ int metadata_size;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -142,6 +145,7 @@ struct nvmet_port {
bool enabled;
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
+ bool pi_enable;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -201,6 +205,7 @@ struct nvmet_ctrl {
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
+ bool pi_support;
};
struct nvmet_subsys_model {
@@ -230,6 +235,7 @@ struct nvmet_subsys {
u64 ver;
u64 serial;
char *subsysnqn;
+ bool pi_support;
struct config_group group;
@@ -281,6 +287,7 @@ struct nvmet_fabrics_ops {
unsigned int type;
unsigned int msdbd;
bool has_keyed_sgls : 1;
+ bool metadata_support : 1;
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
@@ -302,6 +309,7 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
+ struct scatterlist *metadata_sg;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
@@ -315,8 +323,10 @@ struct nvmet_req {
} f;
};
int sg_cnt;
+ int metadata_sg_cnt;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
+ size_t metadata_len;
struct nvmet_port *port;
@@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
-int nvmet_req_alloc_sgl(struct nvmet_req *req);
-void nvmet_req_free_sgl(struct nvmet_req *req);
+int nvmet_req_alloc_sgls(struct nvmet_req *req);
+void nvmet_req_free_sgls(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
@@ -500,13 +510,22 @@ u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_ns_revalidate(struct nvmet_ns *ns);
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
+static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
+static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return 0;
+ return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
+ req->ns->metadata_size;
+}
+
static inline u32 nvmet_dsm_len(struct nvmet_req *req)
{
return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
@@ -521,4 +540,11 @@ static inline __le16 to0based(u32 a)
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
}
+static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return false;
+ return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
+}
+
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 7a90b10359bb..d5141780592e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,6 +33,7 @@
/* Assume mpsmin == device_page_size == 4KB */
#define NVMET_RDMA_MAX_MDTS 8
+#define NVMET_RDMA_MAX_METADATA_MDTS 5
struct nvmet_rdma_srq;
@@ -60,6 +61,7 @@ struct nvmet_rdma_rsp {
struct nvmet_rdma_queue *queue;
struct ib_cqe read_cqe;
+ struct ib_cqe write_cqe;
struct rdma_rw_ctx rw;
struct nvmet_req req;
@@ -161,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@@ -423,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
/* Data In / RDMA READ */
r->read_cqe.done = nvmet_rdma_read_data_done;
+ /* Data Out / RDMA WRITE */
+ r->write_cqe.done = nvmet_rdma_write_data_done;
+
return 0;
out_free_rsp:
@@ -532,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
spin_unlock(&queue->rsp_wr_wait_lock);
}
+static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
+{
+ struct ib_mr_status mr_status;
+ int ret;
+ u16 status = 0;
+
+ ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ return NVME_SC_INVALID_PI;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ status = NVME_SC_GUARD_CHECK;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ status = NVME_SC_REFTAG_CHECK;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ status = NVME_SC_APPTAG_CHECK;
+ break;
+ }
+ pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type,
+ mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ }
+
+ return status;
+}
+
+static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_domain *domain,
+ u16 control, u8 pi_type)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+ domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ domain->sig.dif.ref_remap = true;
+
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_escape = true;
+ if (pi_type == NVME_NS_DPS_PI_TYPE3)
+ domain->sig.dif.ref_escape = true;
+}
+
+static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct nvme_command *cmd = req->cmd;
+ u16 control = le16_to_cpu(cmd->rw.control);
+ u8 pi_type = req->ns->pi_type;
+ struct blk_integrity *bi;
+
+ bi = bdev_get_integrity(req->ns->bdev);
+
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+
+ if (control & NVME_RW_PRINFO_PRACT) {
+ /* for WRITE_INSERT/READ_STRIP no wire domain */
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ /* Clear the PRACT bit since HCA will generate/verify the PI */
+ control &= ~NVME_RW_PRINFO_PRACT;
+ cmd->rw.control = cpu_to_le16(control);
+ /* PI is added by the HW */
+ req->transfer_len += req->metadata_len;
+ } else {
+ /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ }
+
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
+ if (control & NVME_RW_PRINFO_PRCHK_GUARD)
+ sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
+ if (control & NVME_RW_PRINFO_PRCHK_APP)
+ sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
+}
+
+static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+ int ret;
+
+ if (req->metadata_len)
+ ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
+ addr, key, nvmet_data_dir(req));
+ else
+ ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, 0, addr, key,
+ nvmet_data_dir(req));
+
+ return ret;
+}
+
+static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+
+ if (req->metadata_len)
+ rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt,
+ nvmet_data_dir(req));
+ else
+ rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, nvmet_data_dir(req));
+}
static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
{
@@ -539,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
- if (rsp->n_rdma) {
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
- }
+ if (rsp->n_rdma)
+ nvmet_rdma_rw_ctx_destroy(rsp);
if (rsp->req.sg != rsp->cmd->inline_sg)
- nvmet_req_free_sgl(&rsp->req);
+ nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -598,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_wr.opcode = IB_WR_SEND;
}
- if (nvmet_rdma_need_data_out(rsp))
- first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
- cm_id->port_num, NULL, &rsp->send_wr);
- else
+ if (nvmet_rdma_need_data_out(rsp)) {
+ if (rsp->req.metadata_len)
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, &rsp->write_cqe, NULL);
+ else
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, NULL, &rsp->send_wr);
+ } else {
first_wr = &rsp->send_wr;
+ }
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
@@ -621,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
+ u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -640,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
- rsp->req.execute(&rsp->req);
+ if (rsp->req.metadata_len)
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(status))
+ nvmet_req_complete(&rsp->req, status);
+ else
+ rsp->req.execute(&rsp->req);
+}
+
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct nvmet_rdma_rsp *rsp =
+ container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
+ struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ u16 status;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return;
+
+ WARN_ON(rsp->n_rdma <= 0);
+ atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
+ rsp->n_rdma = 0;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
+ nvmet_req_uninit(&rsp->req);
+ nvmet_rdma_release_rsp(rsp);
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
+ wc->wr_cqe, ib_wc_status_msg(wc->status),
+ wc->status);
+ nvmet_rdma_error_comp(queue);
+ }
+ return;
+ }
+
+ /*
+ * Upon RDMA completion check the signature status
+ * - if succeeded send good NVMe response
+ * - if failed send bad NVMe response with appropriate error
+ */
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ if (unlikely(status))
+ rsp->req.cqe->status = cpu_to_le16(status << 1);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
+ pr_err("sending cmd response failed\n");
+ nvmet_rdma_release_rsp(rsp);
+ }
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -697,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
struct nvme_keyed_sgl_desc *sgl, bool invalidate)
{
- struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
u32 key = get_unaligned_le32(sgl->key);
+ struct ib_sig_attrs sig_attrs;
int ret;
rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@@ -708,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!rsp->req.transfer_len)
return 0;
- ret = nvmet_req_alloc_sgl(&rsp->req);
+ if (rsp->req.metadata_len)
+ nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
+
+ ret = nvmet_req_alloc_sgls(&rsp->req);
if (unlikely(ret < 0))
goto error_out;
- ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
- rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
- nvmet_data_dir(&rsp->req));
+ ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@@ -1108,6 +1290,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
+ if (queue->port->pi_enable && queue->host_qid)
+ qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
+
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
if (ret) {
pr_err("failed to create_qp ret= %d\n", ret);
@@ -1226,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
+ struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_queue *queue;
int ret;
@@ -1252,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
+ queue->port = port->nport;
spin_lock_init(&queue->state_lock);
queue->state = NVMET_RDMA_Q_CONNECTING;
@@ -1369,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
- struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
struct nvmet_rdma_queue *queue;
int ret = -EINVAL;
@@ -1385,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = -ENOMEM;
goto put_device;
}
- queue->port = port->nport;
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
@@ -1657,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
+ if (port->nport->pi_enable &&
+ !(cm_id->device->attrs.device_cap_flags &
+ IB_DEVICE_INTEGRITY_HANDOVER)) {
+ pr_err("T10-PI is not supported for %pISpcs\n", addr);
+ ret = -EINVAL;
+ goto out_destroy_id;
+ }
+
port->cm_id = cm_id;
return 0;
@@ -1766,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
+ if (ctrl->pi_support)
+ return NVMET_RDMA_MAX_METADATA_MDTS;
return NVMET_RDMA_MAX_MDTS;
}
@@ -1774,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.type = NVMF_TRTYPE_RDMA,
.msdbd = 1,
.has_keyed_sgls = 1,
+ .metadata_support = 1,
.add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f4..6f557db0320d 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}
+static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
+{
+ if (status == -EPIPE || status == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+}
+
static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
offset_in_page(cmd->data_pdu) + cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE);
+ left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
@@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, flags);
@@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if (!last_in_batch && cmd->queue->send_list_len)
msg.msg_flags |= MSG_MORE;
+ else
+ msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
@@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*sends)++;
}
-
+done:
return ret;
}
@@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_recv_one(queue);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*recvs)++;
}
-
+done:
return ret;
}
@@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
pending = false;
ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
- if (ret > 0) {
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
- if (ret > 0) {
- /* transmitted message/data */
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index e645caa882dd..0458046d6501 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete,
);
+#define aer_name(aer) { aer, #aer }
+
+TRACE_EVENT(nvmet_async_event,
+ TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
+ TP_ARGS(ctrl, result),
+ TP_STRUCT__entry(
+ __field(int, ctrl_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(
+ __entry->ctrl_id = ctrl->cntlid;
+ __entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
+ ),
+ TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
+ __entry->ctrl_id, __entry->result,
+ __print_symbolic(__entry->result,
+ aer_name(NVME_AER_NOTICE_NS_CHANGED),
+ aer_name(NVME_AER_NOTICE_ANA),
+ aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+ aer_name(NVME_AER_NOTICE_DISC_CHANGED),
+ aer_name(NVME_AER_ERROR),
+ aer_name(NVME_AER_SMART),
+ aer_name(NVME_AER_CSS),
+ aer_name(NVME_AER_VS))
+ )
+);
+#undef aer_name
+
#endif /* _TRACE_NVMET_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 21bbccf0dc31..b46ba70f78da 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -895,7 +895,7 @@ __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
"6213 NVMEx LS REQ Abort: Unable to locate req x%p\n",
pnvme_lsreq);
- return 1;
+ return -EINVAL;
}
static int
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 1c6bbbba70b5..32eb5e873e9b 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -3207,8 +3207,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
ctxp = cmdwqe->context2;
result = wcqe->parameter;
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
- atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+ if (phba->nvmet_support) {
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+ }
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n",
@@ -3244,7 +3246,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
- struct lpfc_nvmet_tgtport *tgtp;
+ struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
union lpfc_wqe128 *wqe_abts;
struct lpfc_nodelist *ndlp;
@@ -3253,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
"6067 ABTS: sid %x xri x%x/x%x\n",
sid, xri, ctxp->wqeq->sli4_xritag);
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ if (phba->nvmet_support && phba->targetport)
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
ndlp = lpfc_findnode_did(phba->pport, sid);
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
- atomic_inc(&tgtp->xmt_abort_rsp_error);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6134 Drop ABTS - wrong NDLP state x%x.\n",
(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
@@ -3538,7 +3542,7 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
- struct lpfc_nvmet_tgtport *tgtp;
+ struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
unsigned long flags;
int rc;
@@ -3555,7 +3559,9 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
ctxp->state = LPFC_NVME_STE_LS_ABORT;
}
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ if (phba->nvmet_support && phba->targetport)
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
if (!ctxp->wqeq) {
/* Issue ABTS for this WQE based on iotag */
ctxp->wqeq = lpfc_sli_get_iocbq(phba);
@@ -3582,18 +3588,19 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
spin_unlock_irqrestore(&phba->hbalock, flags);
if (rc == WQE_SUCCESS) {
- atomic_inc(&tgtp->xmt_abort_unsol);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_unsol);
return 0;
}
out:
- atomic_inc(&tgtp->xmt_abort_rsp_error);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_rsp_error);
abts_wqeq->context2 = NULL;
abts_wqeq->context3 = NULL;
lpfc_sli_release_iocbq(phba, abts_wqeq);
- kfree(ctxp);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6056 Failed to Issue ABTS. Status x%x\n", rc);
- return 0;
+ return 1;
}
/**
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1aaf40081e21..9e21c4f3b009 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2813,7 +2813,7 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
struct lpfc_async_xchg_ctx *axchg = NULL;
char *failwhy = NULL;
uint32_t oxid, sid, did, fctl, size;
- int ret;
+ int ret = 1;
d_buf = piocb->context2;
@@ -2897,14 +2897,16 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
(phba->nvmet_support) ? "T" : "I", ret);
out_fail:
- kfree(axchg);
/* recycle receive buffer */
lpfc_in_buf_free(phba, &nvmebuf->dbuf);
/* If start of new exchange, abort it */
- if (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX))
- lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
+ if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)))
+ ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
+
+ if (ret)
+ kfree(axchg);
}
/**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index f9c226f9546a..2590bed6e6b3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -169,8 +169,6 @@ struct disk_part_tbl {
struct disk_events;
struct badblocks;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
@@ -179,8 +177,6 @@ struct blk_integrity {
unsigned char tag_size;
};
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b235a48eac8c..5ce51ab4c50e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -420,6 +420,12 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3,
};
+/* Identify Namespace Metadata Capabilities (MC): */
+enum {
+ NVME_MC_EXTENDED_LBA = (1 << 0),
+ NVME_MC_METADATA_PTR = (1 << 1),
+};
+
struct nvme_ns_id_desc {
__u8 nidt;
__u8 nidl;
@@ -1185,7 +1191,7 @@ struct nvmf_disc_rsp_page_hdr {
__le64 numrec;
__le16 recfmt;
__u8 resv14[1006];
- struct nvmf_disc_rsp_page_entry entries[0];
+ struct nvmf_disc_rsp_page_entry entries[];
};
enum {