diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 153 |
1 files changed, 73 insertions, 80 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a29b170701fc..51852085239e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -307,13 +307,12 @@ static void nvme_dbbuf_free(struct nvme_queue *nvmeq) static void nvme_dbbuf_set(struct nvme_dev *dev) { - struct nvme_command c; + struct nvme_command c = { }; unsigned int i; if (!dev->dbbuf_dbs) return; - memset(&c, 0, sizeof(c)); c.dbbuf.opcode = nvme_admin_dbbuf; c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); @@ -536,7 +535,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); - if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) + if (!nvme_ctrl_sgl_supported(&dev->ctrl)) return false; if (!iod->nvmeq->qid) return false; @@ -559,7 +558,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); dma_addr = next_dma_addr; } - } static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) @@ -576,7 +574,6 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); dma_addr = next_dma_addr; } - } static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) @@ -855,7 +852,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, &cmnd->rw, &bv); if (iod->nvmeq->qid && sgl_threshold && - dev->ctrl.sgls & ((1 << 0) | (1 << 1))) + nvme_ctrl_sgl_supported(&dev->ctrl)) return nvme_setup_sgl_simple(dev, req, &cmnd->rw, &bv); } @@ -1032,7 +1029,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - u16 tmp = nvmeq->cq_head + 1; + u32 tmp = nvmeq->cq_head + 1; if (tmp == nvmeq->q_depth) { nvmeq->cq_head = 0; @@ -1114,9 +1111,8 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_queue *nvmeq = &dev->queues[0]; - struct nvme_command c; + struct nvme_command c = { }; - memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; nvme_submit_cmd(nvmeq, &c, true); @@ -1124,9 +1120,8 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) { - struct nvme_command c; + struct nvme_command c = { }; - memset(&c, 0, sizeof(c)); c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); @@ -1136,7 +1131,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq, s16 vector) { - struct nvme_command c; + struct nvme_command c = { }; int flags = NVME_QUEUE_PHYS_CONTIG; if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) @@ -1146,7 +1141,6 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. */ - memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); c.create_cq.cqid = cpu_to_le16(qid); @@ -1161,7 +1155,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_ctrl *ctrl = &dev->ctrl; - struct nvme_command c; + struct nvme_command c = { }; int flags = NVME_QUEUE_PHYS_CONTIG; /* @@ -1176,7 +1170,6 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. */ - memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); c.create_sq.sqid = cpu_to_le16(qid); @@ -1257,7 +1250,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_queue *nvmeq = iod->nvmeq; struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; - struct nvme_command cmd; + struct nvme_command cmd = { }; u32 csts = readl(dev->bar + NVME_REG_CSTS); /* If PCI error recovery process is happening, we cannot reset or @@ -1337,7 +1330,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) } iod->aborted = 1; - memset(&cmd, 0, sizeof(cmd)); cmd.abort.opcode = nvme_admin_abort_cmd; cmd.abort.cid = req->tag; cmd.abort.sqid = cpu_to_le16(nvmeq->qid); @@ -1562,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) wmb(); /* ensure the first interrupt sees the initialization */ } +/* + * Try getting shutdown_lock while setting up IO queues. + */ +static int nvme_setup_io_queues_trylock(struct nvme_dev *dev) +{ + /* + * Give up if the lock is being held by nvme_dev_disable. + */ + if (!mutex_trylock(&dev->shutdown_lock)) + return -ENODEV; + + /* + * Controller is in wrong state, fail early. + */ + if (dev->ctrl.state != NVME_CTRL_CONNECTING) { + mutex_unlock(&dev->shutdown_lock); + return -ENODEV; + } + + return 0; +} + static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; @@ -1590,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) goto release_cq; nvmeq->cq_vector = vector; - nvme_init_queue(nvmeq, qid); + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; + nvme_init_queue(nvmeq, qid); if (!polled) { result = queue_request_irq(nvmeq); if (result < 0) @@ -1599,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) } set_bit(NVMEQ_ENABLED, &nvmeq->flags); + mutex_unlock(&dev->shutdown_lock); return result; release_sq: dev->online_queues--; + mutex_unlock(&dev->shutdown_lock); adapter_delete_sq(dev, qid); release_cq: adapter_delete_cq(dev, qid); @@ -1888,10 +1907,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) { u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT; u64 dma_addr = dev->host_mem_descs_dma; - struct nvme_command c; + struct nvme_command c = { }; int ret; - memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); c.features.dword11 = cpu_to_le32(bits); @@ -2176,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (nr_io_queues == 0) return 0; - clear_bit(NVMEQ_ENABLED, &adminq->flags); + /* + * Free IRQ resources as soon as NVMEQ_ENABLED bit transitions + * from set to unset. If there is a window to it is truely freed, + * pci_free_irq_vectors() jumping into this window will crash. + * And take lock to avoid racing with pci_free_irq_vectors() in + * nvme_dev_disable() path. + */ + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; + if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + pci_free_irq(pdev, 0, adminq); if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, @@ -2192,14 +2221,17 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_remap_bar(dev, size); if (!result) break; - if (!--nr_io_queues) - return -ENOMEM; + if (!--nr_io_queues) { + result = -ENOMEM; + goto out_unlock; + } } while (1); adminq->q_db = dev->dbs; retry: /* Deregister the admin queue's interrupt */ - pci_free_irq(pdev, 0, adminq); + if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + pci_free_irq(pdev, 0, adminq); /* * If we enable msix early due to not intx, disable it again before @@ -2208,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) pci_free_irq_vectors(pdev); result = nvme_setup_irqs(dev, nr_io_queues); - if (result <= 0) - return -EIO; + if (result <= 0) { + result = -EIO; + goto out_unlock; + } dev->num_vecs = result; result = max(result - 1, 1); @@ -2223,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ result = queue_request_irq(adminq); if (result) - return result; + goto out_unlock; set_bit(NVMEQ_ENABLED, &adminq->flags); + mutex_unlock(&dev->shutdown_lock); result = nvme_create_io_queues(dev); if (result || dev->online_queues < 2) @@ -2233,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->online_queues - 1 < dev->max_qid) { nr_io_queues = dev->online_queues - 1; nvme_disable_io_queues(dev); + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; nvme_suspend_io_queues(dev); goto retry; } @@ -2241,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->io_queues[HCTX_TYPE_READ], dev->io_queues[HCTX_TYPE_POLL]); return 0; +out_unlock: + mutex_unlock(&dev->shutdown_lock); + return result; } static void nvme_del_queue_end(struct request *req, blk_status_t error) @@ -2265,9 +2306,8 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) { struct request_queue *q = nvmeq->dev->ctrl.admin_q; struct request *req; - struct nvme_command cmd; + struct nvme_command cmd = { }; - memset(&cmd, 0, sizeof(cmd)); cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); @@ -2591,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { + if (dev->ctrl.state != NVME_CTRL_RESETTING) { + dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", + dev->ctrl.state); result = -ENODEV; goto out; } @@ -2828,54 +2870,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } -#ifdef CONFIG_ACPI -static bool nvme_acpi_storage_d3(struct pci_dev *dev) -{ - struct acpi_device *adev; - struct pci_dev *root; - acpi_handle handle; - acpi_status status; - u8 val; - - /* - * Look for _DSD property specifying that the storage device on the port - * must use D3 to support deep platform power savings during - * suspend-to-idle. - */ - root = pcie_find_root_port(dev); - if (!root) - return false; - - adev = ACPI_COMPANION(&root->dev); - if (!adev) - return false; - - /* - * The property is defined in the PXSX device for South complex ports - * and in the PEGP device for North complex ports. - */ - status = acpi_get_handle(adev->handle, "PXSX", &handle); - if (ACPI_FAILURE(status)) { - status = acpi_get_handle(adev->handle, "PEGP", &handle); - if (ACPI_FAILURE(status)) - return false; - } - - if (acpi_bus_get_device(handle, &adev)) - return false; - - if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable", - &val)) - return false; - return val == 1; -} -#else -static inline bool nvme_acpi_storage_d3(struct pci_dev *dev) -{ - return false; -} -#endif /* CONFIG_ACPI */ - static void nvme_async_probe(void *data, async_cookie_t cookie) { struct nvme_dev *dev = data; @@ -2925,7 +2919,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && nvme_acpi_storage_d3(pdev)) { + if (!noacpi && acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on * platforms that support kernel managed suspend. @@ -3020,7 +3014,6 @@ static void nvme_remove(struct pci_dev *pdev) if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); - nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); |