aboutsummaryrefslogtreecommitdiff
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c1162
1 files changed, 772 insertions, 390 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3faefabf339c..8569ee771269 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,37 +17,24 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
+#include <linux/sed-opal.h>
#include "nvme.h"
-#define NVME_Q_DEPTH 1024
-#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
@@ -64,12 +51,24 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+ "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
+
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+ .set = io_queue_depth_set,
+ .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
struct nvme_dev;
struct nvme_queue;
-static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
@@ -84,15 +83,13 @@ struct nvme_dev {
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
- unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
u32 db_stride;
void __iomem *bar;
- struct work_struct reset_work;
+ unsigned long bar_mapped_size;
struct work_struct remove_work;
- struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
@@ -102,8 +99,41 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
+
+ /* shadow doorbell buffer support: */
+ u32 *dbbuf_dbs;
+ dma_addr_t dbbuf_dbs_dma_addr;
+ u32 *dbbuf_eis;
+ dma_addr_t dbbuf_eis_dma_addr;
+
+ /* host memory buffer support: */
+ u64 host_mem_size;
+ u32 nr_host_mem_descs;
+ struct nvme_host_mem_buf_desc *host_mem_descs;
+ void **host_mem_desc_bufs;
};
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 2)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
+static inline unsigned int sq_idx(unsigned int qid, u32 stride)
+{
+ return qid * 2 * stride;
+}
+
+static inline unsigned int cq_idx(unsigned int qid, u32 stride)
+{
+ return (qid * 2 + 1) * stride;
+}
+
static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
{
return container_of(ctrl, struct nvme_dev, ctrl);
@@ -116,7 +146,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
- char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
@@ -132,6 +161,10 @@ struct nvme_queue {
u16 qid;
u8 cq_phase;
u8 cqe_seen;
+ u32 *dbbuf_sq_db;
+ u32 *dbbuf_cq_db;
+ u32 *dbbuf_sq_ei;
+ u32 *dbbuf_cq_ei;
};
/*
@@ -166,10 +199,116 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
- BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+ BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+}
+
+static inline unsigned int nvme_dbbuf_size(u32 stride)
+{
+ return ((num_possible_cpus() + 1) * 8 * stride);
+}
+
+static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
+{
+ unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+ if (dev->dbbuf_dbs)
+ return 0;
+
+ dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
+ &dev->dbbuf_dbs_dma_addr,
+ GFP_KERNEL);
+ if (!dev->dbbuf_dbs)
+ return -ENOMEM;
+ dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
+ &dev->dbbuf_eis_dma_addr,
+ GFP_KERNEL);
+ if (!dev->dbbuf_eis) {
+ dma_free_coherent(dev->dev, mem_size,
+ dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+ dev->dbbuf_dbs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
+{
+ unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+ if (dev->dbbuf_dbs) {
+ dma_free_coherent(dev->dev, mem_size,
+ dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+ dev->dbbuf_dbs = NULL;
+ }
+ if (dev->dbbuf_eis) {
+ dma_free_coherent(dev->dev, mem_size,
+ dev->dbbuf_eis, dev->dbbuf_eis_dma_addr);
+ dev->dbbuf_eis = NULL;
+ }
+}
+
+static void nvme_dbbuf_init(struct nvme_dev *dev,
+ struct nvme_queue *nvmeq, int qid)
+{
+ if (!dev->dbbuf_dbs || !qid)
+ return;
+
+ nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
+ nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
+ nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
+ nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
+}
+
+static void nvme_dbbuf_set(struct nvme_dev *dev)
+{
+ struct nvme_command c;
+
+ if (!dev->dbbuf_dbs)
+ return;
+
+ memset(&c, 0, sizeof(c));
+ c.dbbuf.opcode = nvme_admin_dbbuf;
+ c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
+ c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
+
+ if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
+ dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
+ /* Free memory and continue on */
+ nvme_dbbuf_dma_free(dev);
+ }
+}
+
+static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
+{
+ return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
+}
+
+/* Update dbbuf and return true if an MMIO is required */
+static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+ volatile u32 *dbbuf_ei)
+{
+ if (dbbuf_db) {
+ u16 old_value;
+
+ /*
+ * Ensure that the queue is written before updating
+ * the doorbell in memory
+ */
+ wmb();
+
+ old_value = *dbbuf_db;
+ *dbbuf_db = value;
+
+ if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
+ return false;
+ }
+
+ return true;
}
/*
@@ -203,11 +342,6 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
}
-static int nvmeq_irq(struct nvme_queue *nvmeq)
-{
- return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
-}
-
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -230,19 +364,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
nvmeq->tags = NULL;
}
-static int nvme_admin_init_request(void *data, struct request *req,
- unsigned int hctx_idx, unsigned int rq_idx,
- unsigned int numa_node)
-{
- struct nvme_dev *dev = data;
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[0];
-
- BUG_ON(!nvmeq);
- iod->nvmeq = nvmeq;
- return 0;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -257,13 +378,13 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
return 0;
}
-static int nvme_init_request(void *data, struct request *req,
- unsigned int hctx_idx, unsigned int rq_idx,
- unsigned int numa_node)
+static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx, unsigned int numa_node)
{
- struct nvme_dev *dev = data;
+ struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+ struct nvme_queue *nvmeq = dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
@@ -296,7 +417,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
if (++tail == nvmeq->q_depth)
tail = 0;
- writel(tail, nvmeq->q_db);
+ if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
+ nvmeq->dbbuf_sq_ei))
+ writel(tail, nvmeq->q_db);
nvmeq->sq_tail = tail;
}
@@ -306,7 +429,7 @@ static __le64 **iod_list(struct request *req)
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
@@ -315,7 +438,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
if (!iod->sg)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
} else {
iod->sg = iod->inline_sg;
}
@@ -325,11 +448,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->nents = 0;
iod->length = size;
- if (!(rq->rq_flags & RQF_DONTPREP)) {
- rq->retries = 0;
- rq->rq_flags |= RQF_DONTPREP;
- }
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -420,7 +539,7 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
}
#endif
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
+static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
@@ -437,7 +556,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
length -= (page_size - offset);
if (length <= 0)
- return true;
+ return BLK_STS_OK;
dma_len -= (page_size - offset);
if (dma_len) {
@@ -450,7 +569,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
if (length <= page_size) {
iod->first_dma = dma_addr;
- return true;
+ return BLK_STS_OK;
}
nprps = DIV_ROUND_UP(length, page_size);
@@ -466,7 +585,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
if (!prp_list) {
iod->first_dma = dma_addr;
iod->npages = -1;
- return false;
+ return BLK_STS_RESOURCE;
}
list[0] = prp_list;
iod->first_dma = prp_dma;
@@ -476,7 +595,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
__le64 *old_prp_list = prp_list;
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
- return false;
+ return BLK_STS_RESOURCE;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -490,38 +609,55 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
break;
if (dma_len > 0)
continue;
- BUG_ON(dma_len < 0);
+ if (unlikely(dma_len < 0))
+ goto bad_sgl;
sg = sg_next(sg);
dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
}
- return true;
+ return BLK_STS_OK;
+
+ bad_sgl:
+ if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
+ blk_rq_payload_bytes(req), iod->nents)) {
+ for_each_sg(iod->sg, sg, iod->nents, i) {
+ dma_addr_t phys = sg_phys(sg);
+ pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+ "dma_address:%pad dma_length:%d\n", i, &phys,
+ sg->offset, sg->length,
+ &sg_dma_address(sg),
+ sg_dma_len(sg));
+ }
+ }
+ return BLK_STS_IOERR;
+
}
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
enum dma_data_direction dma_dir = rq_data_dir(req) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
- int ret = BLK_MQ_RQ_QUEUE_ERROR;
+ blk_status_t ret = BLK_STS_IOERR;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
DMA_ATTR_NO_WARN))
goto out;
- if (!nvme_setup_prps(dev, req))
+ ret = nvme_setup_prps(dev, req);
+ if (ret != BLK_STS_OK)
goto out_unmap;
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
if (blk_integrity_rq(req)) {
if (blk_rq_count_integrity_sg(q, req->bio) != 1)
goto out_unmap;
@@ -541,7 +677,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_unmap:
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -571,7 +707,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
/*
* NOTE: ns is NULL when called on the admin queue.
*/
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
@@ -579,50 +715,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
- int ret = BLK_MQ_RQ_QUEUE_OK;
-
- /*
- * If formated with metadata, require the block layer provide a buffer
- * unless this namespace is formated such that the metadata can be
- * stripped/generated by the controller with PRACT=1.
- */
- if (ns && ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8) &&
- req->cmd_type != REQ_TYPE_DRV_PRIV) {
- blk_mq_end_request(req, -EFAULT);
- return BLK_MQ_RQ_QUEUE_OK;
- }
- }
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &cmnd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
ret = nvme_init_iod(req, dev);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
goto out_free_cmd;
- if (blk_rq_nr_phys_segments(req))
+ if (blk_rq_nr_phys_segments(req)) {
ret = nvme_map_data(dev, req, &cmnd);
-
- if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out_cleanup_iod;
+ if (ret)
+ goto out_cleanup_iod;
+ }
blk_mq_start_request(req);
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
- ret = BLK_MQ_RQ_QUEUE_BUSY;
- else
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
__nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
out_free_cmd:
@@ -630,34 +750,12 @@ out_free_cmd:
return ret;
}
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_dev *dev = iod->nvmeq->dev;
- int error = 0;
- nvme_unmap_data(dev, req);
-
- if (unlikely(req->errors)) {
- if (nvme_req_needs_retry(req, req->errors)) {
- req->retries++;
- nvme_requeue_req(req);
- return;
- }
-
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
- error = req->errors;
- else
- error = nvme_error_status(req->errors);
- }
-
- if (unlikely(iod->aborted)) {
- dev_warn(dev->ctrl.device,
- "completing aborted command with status: %04x\n",
- req->errors);
- }
-
- blk_mq_end_request(req, error);
+ nvme_unmap_data(iod->nvmeq->dev, req);
+ nvme_complete_rq(req);
}
/* We read the CQE phase first to check if the rest of the entry is valid */
@@ -667,64 +765,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{
- u16 head, phase;
-
- head = nvmeq->cq_head;
- phase = nvmeq->cq_phase;
+ u16 head = nvmeq->cq_head;
- while (nvme_cqe_valid(nvmeq, head, phase)) {
- struct nvme_completion cqe = nvmeq->cqes[head];
- struct request *req;
-
- if (++head == nvmeq->q_depth) {
- head = 0;
- phase = !phase;
- }
-
- if (tag && *tag == cqe.command_id)
- *tag = -1;
-
- if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
- dev_warn(nvmeq->dev->ctrl.device,
- "invalid id %d completed on queue %d\n",
- cqe.command_id, le16_to_cpu(cqe.sq_id));
- continue;
- }
+ if (likely(nvmeq->cq_vector >= 0)) {
+ if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+ nvmeq->dbbuf_cq_ei))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ }
+}
- /*
- * AEN requests are special as they don't time out and can
- * survive any kind of queue freeze and often don't respond to
- * aborts. We don't even bother to allocate a struct request
- * for them but rather special case them here.
- */
- if (unlikely(nvmeq->qid == 0 &&
- cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
- nvme_complete_async_event(&nvmeq->dev->ctrl,
- cqe.status, &cqe.result);
- continue;
- }
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ struct request *req;
- req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
- nvme_req(req)->result = cqe.result;
- blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.device,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
}
- if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(&nvmeq->dev->ctrl,
+ cqe->status, &cqe->result);
return;
+ }
- if (likely(nvmeq->cq_vector >= 0))
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
- nvmeq->cq_head = head;
- nvmeq->cq_phase = phase;
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ nvme_end_request(req, cqe->status, cqe->result);
+}
- nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+ *cqe = nvmeq->cqes[nvmeq->cq_head];
+
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+ return true;
+ }
+ return false;
}
static void nvme_process_cq(struct nvme_queue *nvmeq)
{
- __nvme_process_cq(nvmeq, NULL);
+ struct nvme_completion cqe;
+ int consumed = 0;
+
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+ }
+
+ if (consumed) {
+ nvme_ring_cq_doorbell(nvmeq);
+ nvmeq->cqe_seen = 1;
+ }
}
static irqreturn_t nvme_irq(int irq, void *data)
@@ -747,20 +856,37 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_NONE;
}
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- struct nvme_queue *nvmeq = hctx->driver_data;
+ struct nvme_completion cqe;
+ int found = 0, consumed = 0;
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_process_cq(nvmeq, &tag);
- spin_unlock_irq(&nvmeq->q_lock);
+ if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ return 0;
- if (tag == -1)
- return 1;
- }
+ spin_lock_irq(&nvmeq->q_lock);
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
- return 0;
+ if (tag == cqe.command_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (consumed)
+ nvme_ring_cq_doorbell(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+
+ return found;
+}
+
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+
+ return __nvme_poll(nvmeq, tag);
}
static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
@@ -814,7 +940,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_queue *nvmeq)
{
struct nvme_command c;
- int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+ int flags = NVME_QUEUE_PHYS_CONTIG;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
@@ -841,17 +967,62 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
- u16 status = req->errors;
- dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+ dev_warn(nvmeq->dev->ctrl.device,
+ "Abort status: 0x%x", nvme_req(req)->status);
atomic_inc(&nvmeq->dev->ctrl.abort_limit);
blk_mq_free_request(req);
}
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+ /* If true, indicates loss of adapter communication, possibly by a
+ * NVMe Subsystem reset.
+ */
+ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+ /* If there is a reset ongoing, we shouldn't reset again. */
+ if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ return false;
+
+ /* We shouldn't reset unless the controller is on fatal error state
+ * _or_ if we lost the communication with it.
+ */
+ if (!(csts & NVME_CSTS_CFS) && !nssro)
+ return false;
+
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return false;
+
+ return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+ /* Read a config register to help see what died. */
+ u16 pci_status;
+ int result;
+
+ result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+ &pci_status);
+ if (result == PCIBIOS_SUCCESSFUL)
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+ csts, pci_status);
+ else
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+ csts, result);
+}
+
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -859,6 +1030,27 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Reset immediately if the controller is failed
+ */
+ if (nvme_should_reset(dev, csts)) {
+ nvme_warn_reset(dev, csts);
+ nvme_dev_disable(dev, false);
+ nvme_reset_ctrl(&dev->ctrl);
+ return BLK_EH_HANDLED;
+ }
+
+ /*
+ * Did we miss an interrupt?
+ */
+ if (__nvme_poll(nvmeq, req->tag)) {
+ dev_warn(dev->ctrl.device,
+ "I/O %d QID %d timeout, completion polled\n",
+ req->tag, nvmeq->qid);
+ return BLK_EH_HANDLED;
+ }
/*
* Shutdown immediately if controller times out while starting. The
@@ -871,7 +1063,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- req->errors = NVME_SC_CANCELLED;
+ nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
}
@@ -885,22 +1077,21 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
/*
* Mark the request as handled, since the inline shutdown
* forces all outstanding requests to complete.
*/
- req->errors = NVME_SC_CANCELLED;
+ nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
}
- iod->aborted = 1;
-
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
+ iod->aborted = 1;
memset(&cmd, 0, sizeof(cmd));
cmd.abort.opcode = nvme_admin_abort_cmd;
@@ -944,9 +1135,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
{
int i;
- for (i = dev->queue_count - 1; i >= lowest; i--) {
+ for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
- dev->queue_count--;
+ dev->ctrl.queue_count--;
dev->queues[i] = NULL;
nvme_free_queue(nvmeq);
}
@@ -965,15 +1156,15 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
return 1;
}
- vector = nvmeq_irq(nvmeq);
+ vector = nvmeq->cq_vector;
nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+ blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
- free_irq(vector, nvmeq);
+ pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
return 0;
}
@@ -990,8 +1181,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
- nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
- dev->bar + NVME_REG_CAP));
+ nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
@@ -1041,9 +1231,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth)
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+ struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+ node);
if (!nvmeq)
return NULL;
@@ -1057,8 +1248,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
- snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
- dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
@@ -1067,7 +1256,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
- dev->queue_count++;
+ dev->ctrl.queue_count++;
return nvmeq;
@@ -1081,12 +1270,16 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
static int queue_request_irq(struct nvme_queue *nvmeq)
{
- if (use_threaded_interrupts)
- return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
- nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
- else
- return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
- nvmeq->irqname, nvmeq);
+ struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
+ int nr = nvmeq->dev->ctrl.instance;
+
+ if (use_threaded_interrupts) {
+ return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq_check,
+ nvme_irq, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
+ } else {
+ return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq,
+ NULL, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
+ }
}
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1099,6 +1292,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
spin_unlock_irq(&nvmeq->q_lock);
}
@@ -1131,18 +1325,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
}
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
- .complete = nvme_complete_rq,
+ .complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
- .init_request = nvme_admin_init_request,
+ .init_request = nvme_init_request,
.timeout = nvme_timeout,
};
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
- .complete = nvme_complete_rq,
+ .complete = nvme_pci_complete_rq,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
.map_queues = nvme_pci_map_queues,
@@ -1158,7 +1352,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1178,6 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
+ dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
@@ -1194,32 +1389,62 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return -ENODEV;
}
} else
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
return 0;
}
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+ return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (size <= dev->bar_mapped_size)
+ return 0;
+ if (size > pci_resource_len(pdev, 0))
+ return -ENOMEM;
+ if (dev->bar)
+ iounmap(dev->bar);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+ if (!dev->bar) {
+ dev->bar_mapped_size = 0;
+ return -ENOMEM;
+ }
+ dev->bar_mapped_size = size;
+ dev->dbs = dev->bar + NVME_REG_DBS;
+
+ return 0;
+}
+
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
+ result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+ if (result < 0)
+ return result;
+
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
- NVME_CAP_NSSRC(cap) : 0;
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result < 0)
return result;
nvmeq = dev->queues[0];
if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+ nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
if (!nvmeq)
return -ENOMEM;
}
@@ -1231,7 +1456,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result)
return result;
@@ -1245,79 +1470,21 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
- /* If true, indicates loss of adapter communication, possibly by a
- * NVMe Subsystem reset.
- */
- bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (work_busy(&dev->reset_work))
- return false;
-
- /* We shouldn't reset unless the controller is on fatal error state
- * _or_ if we lost the communication with it.
- */
- if (!(csts & NVME_CSTS_CFS) && !nssro)
- return false;
-
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
- return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
- /* Read a config register to help see what died. */
- u16 pci_status;
- int result;
-
- result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
- &pci_status);
- if (result == PCIBIOS_SUCCESSFUL)
- dev_warn(dev->dev,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
- csts, pci_status);
- else
- dev_warn(dev->dev,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
- csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
- struct nvme_dev *dev = (struct nvme_dev *)data;
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
- /* Skip controllers under certain specific conditions. */
- if (nvme_should_reset(dev, csts)) {
- if (!nvme_reset(dev))
- nvme_warn_reset(dev, csts);
- return;
- }
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i, max;
int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++) {
- if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
+ /* vector == qid - 1, match nvme_create_queue */
+ if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
}
}
- max = min(dev->max_qid, dev->queue_count - 1);
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret)
@@ -1391,21 +1558,179 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
if (dev->cmb) {
iounmap(dev->cmb);
dev->cmb = NULL;
+ if (dev->cmbsz) {
+ sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL);
+ dev->cmbsz = 0;
+ }
+ }
+}
+
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
+ size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ struct nvme_command c;
+ u64 dma_addr;
+ int ret;
+
+ dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev->dev, dma_addr))
+ return -ENOMEM;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+ c.features.dword11 = cpu_to_le32(bits);
+ c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
+ ilog2(dev->ctrl.page_size));
+ c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
+ c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
+ c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
+
+ ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "failed to set host mem (err %d, flags %#x).\n",
+ ret, bits);
+ }
+ dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+ return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nr_host_mem_descs; i++) {
+ struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+ size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr));
+ }
+
+ kfree(dev->host_mem_desc_bufs);
+ dev->host_mem_desc_bufs = NULL;
+ kfree(dev->host_mem_descs);
+ dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+{
+ struct nvme_host_mem_buf_desc *descs;
+ u32 chunk_size, max_entries;
+ int i = 0;
+ void **bufs;
+ u64 size = 0, tmp;
+
+ /* start big and work our way down */
+ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+ tmp = (preferred + chunk_size - 1);
+ do_div(tmp, chunk_size);
+ max_entries = tmp;
+ descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ if (!descs)
+ goto out;
+
+ bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+ if (!bufs)
+ goto out_free_descs;
+
+ for (size = 0; size < preferred; size += chunk_size) {
+ u32 len = min_t(u64, chunk_size, preferred - size);
+ dma_addr_t dma_addr;
+
+ bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+ if (!bufs[i])
+ break;
+
+ descs[i].addr = cpu_to_le64(dma_addr);
+ descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+ i++;
+ }
+
+ if (!size || (min && size < min)) {
+ dev_warn(dev->ctrl.device,
+ "failed to allocate host memory buffer.\n");
+ goto out_free_bufs;
+ }
+
+ dev_info(dev->ctrl.device,
+ "allocated %lld MiB host memory buffer.\n",
+ size >> ilog2(SZ_1M));
+ dev->nr_host_mem_descs = i;
+ dev->host_mem_size = size;
+ dev->host_mem_descs = descs;
+ dev->host_mem_desc_bufs = bufs;
+ return 0;
+
+out_free_bufs:
+ while (--i >= 0) {
+ size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr));
}
+
+ kfree(bufs);
+out_free_descs:
+ kfree(descs);
+out:
+ /* try a smaller chunk size if we failed early */
+ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+ chunk_size /= 2;
+ goto retry;
+ }
+ dev->host_mem_descs = NULL;
+ return -ENOMEM;
}
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static void nvme_setup_host_mem(struct nvme_dev *dev)
{
- return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+ u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+ u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+ u64 min = (u64)dev->ctrl.hmmin * 4096;
+ u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+ preferred = min(preferred, max);
+ if (min > max) {
+ dev_warn(dev->ctrl.device,
+ "min host memory (%lld MiB) above limit (%d MiB).\n",
+ min >> ilog2(SZ_1M), max_host_mem_size_mb);
+ nvme_free_host_mem(dev);
+ return;
+ }
+
+ /*
+ * If we already have a buffer allocated check if we can reuse it.
+ */
+ if (dev->host_mem_descs) {
+ if (dev->host_mem_size >= min)
+ enable_bits |= NVME_HOST_MEM_RETURN;
+ else
+ nvme_free_host_mem(dev);
+ }
+
+ if (!dev->host_mem_descs) {
+ if (nvme_alloc_host_mem(dev, min, preferred))
+ return;
+ }
+
+ if (nvme_set_host_mem(dev, enable_bits))
+ nvme_free_host_mem(dev);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues, size;
+ int result, nr_io_queues;
+ unsigned long size;
- nr_io_queues = num_online_cpus();
+ nr_io_queues = num_present_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
@@ -1422,23 +1747,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_release_cmb(dev);
}
- size = db_bar_size(dev, nr_io_queues);
- if (size > 8192) {
- iounmap(dev->bar);
- do {
- dev->bar = ioremap(pci_resource_start(pdev, 0), size);
- if (dev->bar)
- break;
- if (!--nr_io_queues)
- return -ENOMEM;
- size = db_bar_size(dev, nr_io_queues);
- } while (1);
- dev->dbs = dev->bar + 4096;
- adminq->q_db = dev->dbs;
- }
+ do {
+ size = db_bar_size(dev, nr_io_queues);
+ result = nvme_remap_bar(dev, size);
+ if (!result)
+ break;
+ if (!--nr_io_queues)
+ return -ENOMEM;
+ } while (1);
+ adminq->q_db = dev->dbs;
/* Deregister the admin queue's interrupt */
- free_irq(pci_irq_vector(pdev, 0), adminq);
+ pci_free_irq(pdev, 0, adminq);
/*
* If we enable msix early due to not intx, disable it again before
@@ -1466,7 +1786,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return nvme_create_io_queues(dev);
}
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1474,7 +1794,7 @@ static void nvme_del_queue_end(struct request *req, int error)
complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1567,6 +1887,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
dev->ctrl.tagset = &dev->tagset;
+
+ nvme_dbbuf_set(dev);
} else {
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
@@ -1579,7 +1901,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
static int nvme_pci_enable(struct nvme_dev *dev)
{
- u64 cap;
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -1606,10 +1927,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
if (result < 0)
return result;
- cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+ dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
- dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
- dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
dev->dbs = dev->bar + 4096;
/*
@@ -1618,9 +1940,15 @@ static int nvme_pci_enable(struct nvme_dev *dev)
*/
if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
dev->q_depth = 2;
- dev_warn(dev->dev, "detected Apple NVMe controller, set "
- "queue depth=%u to work around controller resets\n",
+ dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
+ "set queue depth=%u to work around controller resets\n",
dev->q_depth);
+ } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+ (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+ NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+ dev->q_depth = 64;
+ dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+ "set queue depth=%u\n", dev->q_depth);
}
/*
@@ -1637,7 +1965,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
if (dev->cmbsz) {
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL))
- dev_warn(dev->dev,
+ dev_warn(dev->ctrl.device,
"failed to add sysfs attribute for CMB\n");
}
}
@@ -1662,6 +1990,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ nvme_release_cmb(dev);
pci_free_irq_vectors(pdev);
if (pci_is_enabled(pdev)) {
@@ -1673,26 +2002,50 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i, queues;
- u32 csts = -1;
-
- del_timer_sync(&dev->watchdog_timer);
+ bool dead = true;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(to_pci_dev(dev->dev))) {
- nvme_stop_queues(&dev->ctrl);
- csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ if (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING)
+ nvme_start_freeze(&dev->ctrl);
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+ pdev->error_state != pci_channel_io_normal);
}
+ /*
+ * Give the controller a chance to complete all entered requests if
+ * doing a safe shutdown.
+ */
+ if (!dead) {
+ if (shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+ /*
+ * If the controller is still alive tell it to stop using the
+ * host memory buffer. In theory the shutdown / reset should
+ * make sure that it doesn't access the host memoery anymore,
+ * but I'd rather be safe than sorry..
+ */
+ if (dev->host_mem_descs)
+ nvme_set_host_mem(dev, 0);
+
+ }
+ nvme_stop_queues(&dev->ctrl);
+
queues = dev->online_queues - 1;
- for (i = dev->queue_count - 1; i > 0; i--)
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
- if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+ if (dead) {
/* A device might become IO incapable very soon during
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
*/
- if (dev->queue_count)
+ if (dev->ctrl.queue_count)
nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev, queues);
@@ -1702,6 +2055,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+ /*
+ * The driver will not be starting up queues again if shutting down so
+ * must flush all entered requests to their failed completion to avoid
+ * deadlocking blk-mq hot-cpu notifier.
+ */
+ if (shutdown)
+ nvme_start_queues(&dev->ctrl);
mutex_unlock(&dev->shutdown_lock);
}
@@ -1732,12 +2093,14 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
+ nvme_dbbuf_dma_free(dev);
put_device(dev->dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
+ free_opal_dev(dev->ctrl.opal_dev);
kfree(dev);
}
@@ -1753,10 +2116,12 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ struct nvme_dev *dev =
+ container_of(work, struct nvme_dev, ctrl.reset_work);
+ bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
- if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
+ if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
goto out;
/*
@@ -1766,14 +2131,11 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
- goto out;
-
result = nvme_pci_enable(dev);
if (result)
goto out;
- result = nvme_configure_admin_queue(dev);
+ result = nvme_pci_configure_admin_queue(dev);
if (result)
goto out;
@@ -1786,22 +2148,32 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
+ if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+ if (!dev->ctrl.opal_dev)
+ dev->ctrl.opal_dev =
+ init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+ else if (was_suspend)
+ opal_unlock_from_suspend(dev->ctrl.opal_dev);
+ } else {
+ free_opal_dev(dev->ctrl.opal_dev);
+ dev->ctrl.opal_dev = NULL;
+ }
+
+ if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
+ result = nvme_dbbuf_dma_alloc(dev);
+ if (result)
+ dev_warn(dev->dev,
+ "unable to allocate dma for dbbuf\n");
+ }
+
+ if (dev->ctrl.hmpre)
+ nvme_setup_host_mem(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
/*
- * A controller that can not execute IO typically requires user
- * intervention to correct. For such degraded controllers, the driver
- * should not submit commands the user did not request, so skip
- * registering for asynchronous event notification on this condition.
- */
- if (dev->online_queues > 1)
- nvme_queue_async_events(&dev->ctrl);
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
- /*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
*/
@@ -1811,7 +2183,9 @@ static void nvme_reset_work(struct work_struct *work)
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
+ nvme_wait_freeze(&dev->ctrl);
nvme_dev_add(dev);
+ nvme_unfreeze(&dev->ctrl);
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
@@ -1819,8 +2193,7 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
- if (dev->online_queues > 1)
- nvme_queue_scan(&dev->ctrl);
+ nvme_start_ctrl(&dev->ctrl);
return;
out:
@@ -1838,17 +2211,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
nvme_put_ctrl(&dev->ctrl);
}
-static int nvme_reset(struct nvme_dev *dev)
-{
- if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
- return -ENODEV;
- if (work_busy(&dev->reset_work))
- return -ENODEV;
- if (!queue_work(nvme_workq, &dev->reset_work))
- return -EBUSY;
- return 0;
-}
-
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
*val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -1867,23 +2229,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
return 0;
}
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
- struct nvme_dev *dev = to_nvme_dev(ctrl);
- int ret = nvme_reset(dev);
-
- if (!ret)
- flush_work(&dev->reset_work);
- return ret;
-}
-
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
+ .flags = NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
- .reset_ctrl = nvme_pci_reset_ctrl,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
};
@@ -1895,8 +2247,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
+ if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
goto release;
return 0;
@@ -1905,10 +2256,31 @@ static int nvme_dev_map(struct nvme_dev *dev)
return -ENODEV;
}
+static unsigned long check_dell_samsung_bug(struct pci_dev *pdev)
+{
+ if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
+ /*
+ * Several Samsung devices seem to drop off the PCIe bus
+ * randomly when APST is on and uses the deepest sleep state.
+ * This has been observed on a Samsung "SM951 NVMe SAMSUNG
+ * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
+ * 950 PRO 256GB", but it seems to be restricted to two Dell
+ * laptops.
+ */
+ if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
+ (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
+ dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
+ return NVME_QUIRK_NO_DEEPEST_PS;
+ }
+
+ return 0;
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
struct nvme_dev *dev;
+ unsigned long quirks = id->driver_data;
node = dev_to_node(&pdev->dev);
if (node == NUMA_NO_NODE)
@@ -1927,48 +2299,52 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
result = nvme_dev_map(dev);
if (result)
- goto free;
+ goto put_pci;
- INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
- (unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);
result = nvme_setup_prp_pools(dev);
if (result)
- goto put_pci;
+ goto unmap;
+
+ quirks |= check_dell_samsung_bug(pdev);
result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
- id->driver_data);
+ quirks);
if (result)
goto release_pools;
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_workq, &dev->reset_work);
+ queue_work(nvme_wq, &dev->ctrl.reset_work);
return 0;
release_pools:
nvme_release_prp_pools(dev);
+ unmap:
+ nvme_dev_unmap(dev);
put_pci:
put_device(dev->dev);
- nvme_dev_unmap(dev);
free:
kfree(dev->queues);
kfree(dev);
return result;
}
-static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
+static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
+ nvme_dev_disable(dev, false);
+}
- if (prepare)
- nvme_dev_disable(dev, false);
- else
- nvme_reset(dev);
+static void nvme_reset_done(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+ nvme_reset_ctrl(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
@@ -1988,17 +2364,22 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+ cancel_work_sync(&dev->ctrl.reset_work);
pci_set_drvdata(pdev, NULL);
- if (!pci_device_is_present(pdev))
+ if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+ nvme_dev_disable(dev, false);
+ }
- flush_work(&dev->reset_work);
- nvme_uninit_ctrl(&dev->ctrl);
+ flush_work(&dev->ctrl.reset_work);
+ nvme_stop_ctrl(&dev->ctrl);
+ nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
+ nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_release_cmb(dev);
+ nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
@@ -2037,7 +2418,7 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset(ndev);
+ nvme_reset_ctrl(&ndev->ctrl);
return 0;
}
#endif
@@ -2076,7 +2457,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -2089,27 +2470,38 @@ static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected,
.slot_reset = nvme_slot_reset,
.resume = nvme_error_resume,
- .reset_notify = nvme_reset_notify,
+ .reset_prepare = nvme_reset_prepare,
+ .reset_done = nvme_reset_done,
};
static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0953),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a53),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DISCARD_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
+ { PCI_VDEVICE(INTEL, 0x0a55),
+ .driver_data = NVME_QUIRK_STRIPE_SIZE |
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
+ { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);
@@ -2129,22 +2521,12 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
- int result;
-
- nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
- if (!nvme_workq)
- return -ENOMEM;
-
- result = pci_register_driver(&nvme_driver);
- if (result)
- destroy_workqueue(nvme_workq);
- return result;
+ return pci_register_driver(&nvme_driver);
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- destroy_workqueue(nvme_workq);
_nvme_check_size();
}