From 99e6b87ec2102b10e190c92ea9560bafcb744f86 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 26 Aug 2016 14:08:53 +0800 Subject: mtip32xx: mark symbols static where possible We get 1 warning when biuld kernel with W=1: drivers/block/mtip32xx/mtip32xx.c:3689:6: warning: no previous prototype for 'mtip_block_release' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 2aca98e8e427..88c46853dbb5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3686,7 +3686,7 @@ static int mtip_block_open(struct block_device *dev, fmode_t mode) return -ENODEV; } -void mtip_block_release(struct gendisk *disk, fmode_t mode) +static void mtip_block_release(struct gendisk *disk, fmode_t mode) { } -- cgit From fd8383fd88a2fd842a9431df5ed353bd7129eecc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Sep 2016 12:33:37 -0700 Subject: nbd: convert to blkmq This moves NBD over to using blkmq, which allows us to get rid of the NBD wide queue lock and the async submit kthread. We will start with 1 hw queue for now, but I plan to add multiple tcp connection support in the future and we'll fix how we set the hwqueue's. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 337 ++++++++++++++++++++-------------------------------- 1 file changed, 129 insertions(+), 208 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a9e398019f38..15e7c6740873 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -45,12 +46,8 @@ struct nbd_device { struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; - spinlock_t queue_lock; - struct list_head queue_head; /* Requests waiting result */ - struct request *active_req; - wait_queue_head_t active_wq; - struct list_head waiting_queue; /* Requests to be sent */ - wait_queue_head_t waiting_wq; + atomic_t outstanding_cmds; + struct blk_mq_tag_set tag_set; struct mutex tx_lock; struct gendisk *disk; @@ -71,6 +68,11 @@ struct nbd_device { #endif }; +struct nbd_cmd { + struct nbd_device *nbd; + struct list_head list; +}; + #if IS_ENABLED(CONFIG_DEBUG_FS) static struct dentry *nbd_dbg_dir; #endif @@ -83,18 +85,6 @@ static unsigned int nbds_max = 16; static struct nbd_device *nbd_dev; static int max_part; -/* - * Use just one lock (or at most 1 per NIC). Two arguments for this: - * 1. Each NIC is essentially a synchronization point for all servers - * accessed through that NIC so there's no need to have more locks - * than NICs anyway. - * 2. More locks lead to more "Dirty cache line bouncing" which will slow - * down each lock to the point where they're actually slower than just - * a single lock. - * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this! - */ -static DEFINE_SPINLOCK(nbd_lock); - static inline struct device *nbd_to_dev(struct nbd_device *nbd) { return disk_to_dev(nbd->disk); @@ -153,18 +143,17 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, return 0; } -static void nbd_end_request(struct nbd_device *nbd, struct request *req) +static void nbd_end_request(struct nbd_cmd *cmd) { + struct nbd_device *nbd = cmd->nbd; + struct request *req = blk_mq_rq_from_pdu(cmd); int error = req->errors ? -EIO : 0; - struct request_queue *q = req->q; - unsigned long flags; - dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req, + dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, error ? "failed" : "done"); - spin_lock_irqsave(q->queue_lock, flags); - __blk_end_request_all(req, error); - spin_unlock_irqrestore(q->queue_lock, flags); + atomic_dec(&nbd->outstanding_cmds); + blk_mq_complete_request(req, error); } /* @@ -193,7 +182,7 @@ static void nbd_xmit_timeout(unsigned long arg) struct nbd_device *nbd = (struct nbd_device *)arg; unsigned long flags; - if (list_empty(&nbd->queue_head)) + if (!atomic_read(&nbd->outstanding_cmds)) return; spin_lock_irqsave(&nbd->sock_lock, flags); @@ -273,8 +262,9 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, } /* always call with the tx_lock held */ -static int nbd_send_req(struct nbd_device *nbd, struct request *req) +static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) { + struct request *req = blk_mq_rq_from_pdu(cmd); int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); @@ -298,10 +288,10 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); } - memcpy(request.handle, &req, sizeof(req)); + memcpy(request.handle, &req->tag, sizeof(req->tag)); dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", - req, nbdcmd_to_ascii(type), + cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, 1, &request, sizeof(request), (type == NBD_CMD_WRITE) ? MSG_MORE : 0); @@ -323,7 +313,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) if (!rq_iter_last(bvec, iter)) flags = MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", - req, bvec.bv_len); + cmd, bvec.bv_len); result = sock_send_bvec(nbd, &bvec, flags); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), @@ -336,29 +326,6 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) return 0; } -static struct request *nbd_find_request(struct nbd_device *nbd, - struct request *xreq) -{ - struct request *req, *tmp; - int err; - - err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq); - if (unlikely(err)) - return ERR_PTR(err); - - spin_lock(&nbd->queue_lock); - list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) { - if (req != xreq) - continue; - list_del_init(&req->queuelist); - spin_unlock(&nbd->queue_lock); - return req; - } - spin_unlock(&nbd->queue_lock); - - return ERR_PTR(-ENOENT); -} - static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) { int result; @@ -370,11 +337,14 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) } /* NULL returned = something went wrong, inform userspace */ -static struct request *nbd_read_stat(struct nbd_device *nbd) +static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) { int result; struct nbd_reply reply; - struct request *req; + struct nbd_cmd *cmd; + struct request *req = NULL; + u16 hwq; + int tag; reply.magic = 0; result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); @@ -390,25 +360,27 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) return ERR_PTR(-EPROTO); } - req = nbd_find_request(nbd, *(struct request **)reply.handle); - if (IS_ERR(req)) { - result = PTR_ERR(req); - if (result != -ENOENT) - return ERR_PTR(result); + memcpy(&tag, reply.handle, sizeof(int)); - dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", - reply.handle); - return ERR_PTR(-EBADR); + hwq = blk_mq_unique_tag_to_hwq(tag); + if (hwq < nbd->tag_set.nr_hw_queues) + req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], + blk_mq_unique_tag_to_tag(tag)); + if (!req || !blk_mq_request_started(req)) { + dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", + tag, req); + return ERR_PTR(-ENOENT); } + cmd = blk_mq_rq_to_pdu(req); if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); req->errors++; - return req; + return cmd; } - dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); + dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); if (rq_data_dir(req) != WRITE) { struct req_iterator iter; struct bio_vec bvec; @@ -419,13 +391,13 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); req->errors++; - return req; + return cmd; } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", - req, bvec.bv_len); + cmd, bvec.bv_len); } } - return req; + return cmd; } static ssize_t pid_show(struct device *dev, @@ -444,7 +416,7 @@ static struct device_attribute pid_attr = { static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) { - struct request *req; + struct nbd_cmd *cmd; int ret; BUG_ON(nbd->magic != NBD_MAGIC); @@ -460,13 +432,13 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_size_update(nbd, bdev); while (1) { - req = nbd_read_stat(nbd); - if (IS_ERR(req)) { - ret = PTR_ERR(req); + cmd = nbd_read_stat(nbd); + if (IS_ERR(cmd)) { + ret = PTR_ERR(cmd); break; } - nbd_end_request(nbd, req); + nbd_end_request(cmd); } nbd_size_clear(nbd, bdev); @@ -475,44 +447,37 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) return ret; } -static void nbd_clear_que(struct nbd_device *nbd) +static void nbd_clear_req(struct request *req, void *data, bool reserved) { - struct request *req; + struct nbd_cmd *cmd; + if (!blk_mq_request_started(req)) + return; + cmd = blk_mq_rq_to_pdu(req); + req->errors++; + nbd_end_request(cmd); +} + +static void nbd_clear_que(struct nbd_device *nbd) +{ BUG_ON(nbd->magic != NBD_MAGIC); /* * Because we have set nbd->sock to NULL under the tx_lock, all - * modifications to the list must have completed by now. For - * the same reason, the active_req must be NULL. - * - * As a consequence, we don't need to take the spin lock while - * purging the list here. + * modifications to the list must have completed by now. */ BUG_ON(nbd->sock); - BUG_ON(nbd->active_req); - while (!list_empty(&nbd->queue_head)) { - req = list_entry(nbd->queue_head.next, struct request, - queuelist); - list_del_init(&req->queuelist); - req->errors++; - nbd_end_request(nbd, req); - } - - while (!list_empty(&nbd->waiting_queue)) { - req = list_entry(nbd->waiting_queue.next, struct request, - queuelist); - list_del_init(&req->queuelist); - req->errors++; - nbd_end_request(nbd, req); - } + blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } -static void nbd_handle_req(struct nbd_device *nbd, struct request *req) +static void nbd_handle_cmd(struct nbd_cmd *cmd) { + struct request *req = blk_mq_rq_from_pdu(cmd); + struct nbd_device *nbd = cmd->nbd; + if (req->cmd_type != REQ_TYPE_FS) goto error_out; @@ -526,6 +491,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) req->errors = 0; mutex_lock(&nbd->tx_lock); + nbd->task_send = current; if (unlikely(!nbd->sock)) { mutex_unlock(&nbd->tx_lock); dev_err(disk_to_dev(nbd->disk), @@ -533,106 +499,34 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) goto error_out; } - nbd->active_req = req; - - if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head)) + if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds)) mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); - if (nbd_send_req(nbd, req) != 0) { + atomic_inc(&nbd->outstanding_cmds); + if (nbd_send_cmd(nbd, cmd) != 0) { dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); req->errors++; - nbd_end_request(nbd, req); - } else { - spin_lock(&nbd->queue_lock); - list_add_tail(&req->queuelist, &nbd->queue_head); - spin_unlock(&nbd->queue_lock); + nbd_end_request(cmd); } - nbd->active_req = NULL; + nbd->task_send = NULL; mutex_unlock(&nbd->tx_lock); - wake_up_all(&nbd->active_wq); return; error_out: req->errors++; - nbd_end_request(nbd, req); -} - -static int nbd_thread_send(void *data) -{ - struct nbd_device *nbd = data; - struct request *req; - - nbd->task_send = current; - - set_user_nice(current, MIN_NICE); - while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { - /* wait for something to do */ - wait_event_interruptible(nbd->waiting_wq, - kthread_should_stop() || - !list_empty(&nbd->waiting_queue)); - - /* extract request */ - if (list_empty(&nbd->waiting_queue)) - continue; - - spin_lock_irq(&nbd->queue_lock); - req = list_entry(nbd->waiting_queue.next, struct request, - queuelist); - list_del_init(&req->queuelist); - spin_unlock_irq(&nbd->queue_lock); - - /* handle request */ - nbd_handle_req(nbd, req); - } - - nbd->task_send = NULL; - - return 0; + nbd_end_request(cmd); } -/* - * We always wait for result of write, for now. It would be nice to make it optional - * in future - * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK)) - * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } - */ - -static void nbd_request_handler(struct request_queue *q) - __releases(q->queue_lock) __acquires(q->queue_lock) +static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; - - while ((req = blk_fetch_request(q)) != NULL) { - struct nbd_device *nbd; - - spin_unlock_irq(q->queue_lock); - - nbd = req->rq_disk->private_data; - - BUG_ON(nbd->magic != NBD_MAGIC); + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n", - req, req->cmd_type); - - if (unlikely(!nbd->sock)) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); - req->errors++; - nbd_end_request(nbd, req); - spin_lock_irq(q->queue_lock); - continue; - } - - spin_lock_irq(&nbd->queue_lock); - list_add_tail(&req->queuelist, &nbd->waiting_queue); - spin_unlock_irq(&nbd->queue_lock); - - wake_up(&nbd->waiting_wq); - - spin_lock_irq(q->queue_lock); - } + blk_mq_start_request(bd->rq); + nbd_handle_cmd(cmd); + return BLK_MQ_RQ_QUEUE_OK; } static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) @@ -700,33 +594,37 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, { switch (cmd) { case NBD_DISCONNECT: { - struct request sreq; + struct request *sreq; dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); if (!nbd->sock) return -EINVAL; + sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0); + if (!sreq) + return -ENOMEM; + mutex_unlock(&nbd->tx_lock); fsync_bdev(bdev); mutex_lock(&nbd->tx_lock); - blk_rq_init(NULL, &sreq); - sreq.cmd_type = REQ_TYPE_DRV_PRIV; + sreq->cmd_type = REQ_TYPE_DRV_PRIV; /* Check again after getting mutex back. */ - if (!nbd->sock) + if (!nbd->sock) { + blk_mq_free_request(sreq); return -EINVAL; + } nbd->disconnect = true; - nbd_send_req(nbd, &sreq); + nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq)); + blk_mq_free_request(sreq); return 0; } case NBD_CLEAR_SOCK: sock_shutdown(nbd); nbd_clear_que(nbd); - BUG_ON(!list_empty(&nbd->queue_head)); - BUG_ON(!list_empty(&nbd->waiting_queue)); kill_bdev(bdev); return 0; @@ -772,7 +670,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_DO_IT: { - struct task_struct *thread; int error; if (nbd->task_recv) @@ -786,18 +683,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_parse_flags(nbd, bdev); - thread = kthread_run(nbd_thread_send, nbd, "%s", - nbd_name(nbd)); - if (IS_ERR(thread)) { - mutex_lock(&nbd->tx_lock); - nbd->task_recv = NULL; - return PTR_ERR(thread); - } - nbd_dev_dbg_init(nbd); error = nbd_thread_recv(nbd, bdev); nbd_dev_dbg_close(nbd); - kthread_stop(thread); mutex_lock(&nbd->tx_lock); nbd->task_recv = NULL; @@ -825,10 +713,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_PRINT_DEBUG: - dev_info(disk_to_dev(nbd->disk), - "next = %p, prev = %p, head = %p\n", - nbd->queue_head.next, nbd->queue_head.prev, - &nbd->queue_head); + /* + * For compatibility only, we no longer keep a list of + * outstanding requests. + */ return 0; } return -ENOTTY; @@ -987,6 +875,23 @@ static void nbd_dbg_close(void) #endif +static int nbd_init_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int request_idx, + unsigned int numa_node) +{ + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); + + cmd->nbd = data; + INIT_LIST_HEAD(&cmd->list); + return 0; +} + +static struct blk_mq_ops nbd_mq_ops = { + .queue_rq = nbd_queue_rq, + .map_queue = blk_mq_map_queue, + .init_request = nbd_init_request, +}; + /* * And here should be modules and kernel interface * (Just smiley confuses emacs :-) @@ -1035,16 +940,34 @@ static int __init nbd_init(void) if (!disk) goto out; nbd_dev[i].disk = disk; + + nbd_dev[i].tag_set.ops = &nbd_mq_ops; + nbd_dev[i].tag_set.nr_hw_queues = 1; + nbd_dev[i].tag_set.queue_depth = 128; + nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; + nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); + nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_SG_MERGE; + nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; + + err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); + if (err) { + put_disk(disk); + goto out; + } + /* * The new linux 2.5 block layer implementation requires * every gendisk to have its very own request_queue struct. * These structs are big so we dynamically allocate them. */ - disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock); + disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set); if (!disk->queue) { + blk_mq_free_tag_set(&nbd_dev[i].tag_set); put_disk(disk); goto out; } + /* * Tell the block layer that we are not a rotational device */ @@ -1069,16 +992,12 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; nbd_dev[i].magic = NBD_MAGIC; - INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); - spin_lock_init(&nbd_dev[i].queue_lock); spin_lock_init(&nbd_dev[i].sock_lock); - INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_timer(&nbd_dev[i].timeout_timer); nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; - init_waitqueue_head(&nbd_dev[i].active_wq); - init_waitqueue_head(&nbd_dev[i].waiting_wq); + atomic_set(&nbd_dev[i].outstanding_cmds, 0); disk->major = NBD_MAJOR; disk->first_minor = i << part_shift; disk->fops = &nbd_fops; @@ -1091,6 +1010,7 @@ static int __init nbd_init(void) return 0; out: while (i--) { + blk_mq_free_tag_set(&nbd_dev[i].tag_set); blk_cleanup_queue(nbd_dev[i].disk->queue); put_disk(nbd_dev[i].disk); } @@ -1110,6 +1030,7 @@ static void __exit nbd_cleanup(void) if (disk) { del_gendisk(disk); blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd_dev[i].tag_set); put_disk(disk); } } -- cgit From c261189862c6f65117eb3b1748622a08ef49c262 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Sep 2016 12:33:38 -0700 Subject: nbd: don't shutdown sock with irq's disabled We hit a warning when shutting down the nbd connection because we have irq's disabled. We don't really need to do the shutdown under the lock, just clear the nbd->sock. So do the shutdown outside of the irq. This gets rid of the warning. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 15e7c6740873..4b7d0f3c3521 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -161,6 +161,8 @@ static void nbd_end_request(struct nbd_cmd *cmd) */ static void sock_shutdown(struct nbd_device *nbd) { + struct socket *sock; + spin_lock_irq(&nbd->sock_lock); if (!nbd->sock) { @@ -168,18 +170,21 @@ static void sock_shutdown(struct nbd_device *nbd) return; } + sock = nbd->sock; dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); - kernel_sock_shutdown(nbd->sock, SHUT_RDWR); - sockfd_put(nbd->sock); nbd->sock = NULL; spin_unlock_irq(&nbd->sock_lock); + kernel_sock_shutdown(sock, SHUT_RDWR); + sockfd_put(sock); + del_timer(&nbd->timeout_timer); } static void nbd_xmit_timeout(unsigned long arg) { struct nbd_device *nbd = (struct nbd_device *)arg; + struct socket *sock = NULL; unsigned long flags; if (!atomic_read(&nbd->outstanding_cmds)) @@ -189,10 +194,16 @@ static void nbd_xmit_timeout(unsigned long arg) nbd->timedout = true; - if (nbd->sock) - kernel_sock_shutdown(nbd->sock, SHUT_RDWR); + if (nbd->sock) { + sock = nbd->sock; + get_file(sock->file); + } spin_unlock_irqrestore(&nbd->sock_lock, flags); + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sockfd_put(sock); + } dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); } -- cgit From 9b4a6ba9185ac1c398f2db69c491989452ce7018 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Sep 2016 12:33:39 -0700 Subject: nbd: use flags instead of bool In preparation for some future changes, change a few of the state bools over to normal bits to set/clear properly. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4b7d0f3c3521..cf855a1b3729 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,8 +41,12 @@ #include +#define NBD_TIMEDOUT 0 +#define NBD_DISCONNECT_REQUESTED 1 + struct nbd_device { u32 flags; + unsigned long runtime_flags; struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; @@ -54,8 +58,6 @@ struct nbd_device { int blksize; loff_t bytesize; int xmit_timeout; - bool timedout; - bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; /* protects initialization and shutdown of the socket */ @@ -192,7 +194,7 @@ static void nbd_xmit_timeout(unsigned long arg) spin_lock_irqsave(&nbd->sock_lock, flags); - nbd->timedout = true; + set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); if (nbd->sock) { sock = nbd->sock; @@ -562,8 +564,7 @@ out: /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - nbd->disconnect = false; - nbd->timedout = false; + nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); @@ -626,7 +627,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return -EINVAL; } - nbd->disconnect = true; + set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags); nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq)); blk_mq_free_request(sreq); @@ -706,9 +707,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, kill_bdev(bdev); nbd_bdev_reset(bdev); - if (nbd->disconnect) /* user requested, ignore socket errors */ + /* user requested, ignore socket errors */ + if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) error = 0; - if (nbd->timedout) + if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) error = -ETIMEDOUT; nbd_reset(nbd); -- cgit From 0eadf37afc2500e1162c9040ec26a705b9af8d47 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Sep 2016 12:33:40 -0700 Subject: nbd: allow block mq to deal with timeouts Instead of rolling our own timer, just utilize the blk mq req timeout and do the disconnect if any of our commands timeout. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 51 ++++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cf855a1b3729..4c6dd1a85ead 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -50,16 +50,13 @@ struct nbd_device { struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; - atomic_t outstanding_cmds; struct blk_mq_tag_set tag_set; struct mutex tx_lock; struct gendisk *disk; int blksize; loff_t bytesize; - int xmit_timeout; - struct timer_list timeout_timer; /* protects initialization and shutdown of the socket */ spinlock_t sock_lock; struct task_struct *task_recv; @@ -154,7 +151,6 @@ static void nbd_end_request(struct nbd_cmd *cmd) dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, error ? "failed" : "done"); - atomic_dec(&nbd->outstanding_cmds); blk_mq_complete_request(req, error); } @@ -165,7 +161,7 @@ static void sock_shutdown(struct nbd_device *nbd) { struct socket *sock; - spin_lock_irq(&nbd->sock_lock); + spin_lock(&nbd->sock_lock); if (!nbd->sock) { spin_unlock_irq(&nbd->sock_lock); @@ -175,24 +171,20 @@ static void sock_shutdown(struct nbd_device *nbd) sock = nbd->sock; dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); nbd->sock = NULL; - spin_unlock_irq(&nbd->sock_lock); + spin_unlock(&nbd->sock_lock); kernel_sock_shutdown(sock, SHUT_RDWR); sockfd_put(sock); - - del_timer(&nbd->timeout_timer); } -static void nbd_xmit_timeout(unsigned long arg) +static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, + bool reserved) { - struct nbd_device *nbd = (struct nbd_device *)arg; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + struct nbd_device *nbd = cmd->nbd; struct socket *sock = NULL; - unsigned long flags; - - if (!atomic_read(&nbd->outstanding_cmds)) - return; - spin_lock_irqsave(&nbd->sock_lock, flags); + spin_lock(&nbd->sock_lock); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); @@ -201,13 +193,15 @@ static void nbd_xmit_timeout(unsigned long arg) get_file(sock->file); } - spin_unlock_irqrestore(&nbd->sock_lock, flags); + spin_unlock(&nbd->sock_lock); if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sockfd_put(sock); } + req->errors++; dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); + return BLK_EH_HANDLED; } /* @@ -257,9 +251,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, tsk_restore_flags(current, pflags, PF_MEMALLOC); - if (!send && nbd->xmit_timeout) - mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); - return result; } @@ -512,10 +503,6 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd) goto error_out; } - if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds)) - mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); - - atomic_inc(&nbd->outstanding_cmds); if (nbd_send_cmd(nbd, cmd) != 0) { dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); req->errors++; @@ -569,9 +556,8 @@ static void nbd_reset(struct nbd_device *nbd) nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; - nbd->xmit_timeout = 0; + nbd->tag_set.timeout = 0; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); - del_timer_sync(&nbd->timeout_timer); } static void nbd_bdev_reset(struct block_device *bdev) @@ -668,13 +654,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return nbd_size_set(nbd, bdev, nbd->blksize, arg); case NBD_SET_TIMEOUT: - nbd->xmit_timeout = arg * HZ; - if (arg) - mod_timer(&nbd->timeout_timer, - jiffies + nbd->xmit_timeout); - else - del_timer_sync(&nbd->timeout_timer); - + nbd->tag_set.timeout = arg * HZ; return 0; case NBD_SET_FLAGS: @@ -836,7 +816,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); - debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout); + debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); @@ -903,6 +883,7 @@ static struct blk_mq_ops nbd_mq_ops = { .queue_rq = nbd_queue_rq, .map_queue = blk_mq_map_queue, .init_request = nbd_init_request, + .timeout = nbd_xmit_timeout, }; /* @@ -1007,10 +988,6 @@ static int __init nbd_init(void) nbd_dev[i].magic = NBD_MAGIC; spin_lock_init(&nbd_dev[i].sock_lock); mutex_init(&nbd_dev[i].tx_lock); - init_timer(&nbd_dev[i].timeout_timer); - nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; - nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; - atomic_set(&nbd_dev[i].outstanding_cmds, 0); disk->major = NBD_MAJOR; disk->first_minor = i << part_shift; disk->fops = &nbd_fops; -- cgit From 4382e33ad374862eacf62003bb02c750391ada05 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:45:36 +0200 Subject: block, dm-crypt, btrfs: Introduce bio_flags() Introduce the bio_flags() macro. Ensure that the second argument of bio_set_op_attrs() only contains flags and no operation. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Chris Mason (maintainer:BTRFS FILE SYSTEM) Cc: Josef Bacik (maintainer:BTRFS FILE SYSTEM) Cc: Mike Snitzer Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/dm-crypt.c | 2 +- fs/btrfs/inode.c | 5 +++-- include/linux/blk_types.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eedba67b0e3e..9ba0f0724d28 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1136,7 +1136,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) clone->bi_private = io; clone->bi_end_io = crypt_endio; clone->bi_bdev = cc->dev->bdev; - bio_set_op_attrs(clone, bio_op(io->base_bio), io->base_bio->bi_opf); + bio_set_op_attrs(clone, bio_op(io->base_bio), bio_flags(io->base_bio)); } static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e6811c42e41e..ca01106795ea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8412,7 +8412,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; - bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf); + bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8450,7 +8450,8 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; - bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf); + bio_set_op_attrs(bio, bio_op(orig_bio), + bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1e1ef210ae91..311fa2f478b8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -90,11 +90,12 @@ struct bio { }; #define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) +#define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) #define bio_set_op_attrs(bio, op, op_flags) do { \ WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (bio)->bi_opf &= ((1 << BIO_OP_SHIFT) - 1); \ + (bio)->bi_opf = bio_flags(bio); \ (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ (bio)->bi_opf |= op_flags; \ } while (0) -- cgit From e105ddb4a2a13d779311349df2c32fa22a87c406 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Sep 2016 14:25:03 +0200 Subject: lightnvm: NVM should depend on HAS_DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If NO_DMA=y: drivers/built-in.o: In function `nvme_nvm_dev_dma_free': lightnvm.c:(.text+0x23df1a): undefined reference to `dma_pool_free' drivers/built-in.o: In function `nvme_nvm_dev_dma_alloc': lightnvm.c:(.text+0x23df38): undefined reference to `dma_pool_alloc' drivers/built-in.o: In function `nvme_nvm_destroy_dma_pool': lightnvm.c:(.text+0x23df4c): undefined reference to `dma_pool_destroy' drivers/built-in.o: In function `nvme_nvm_create_dma_pool': lightnvm.c:(.text+0x23df7e): undefined reference to `dma_pool_create' and ERROR: "dma_pool_destroy" [drivers/nvme/host/nvme-core.ko] undefined! ERROR: "dma_pool_free" [drivers/nvme/host/nvme-core.ko] undefined! ERROR: "dma_pool_alloc" [drivers/nvme/host/nvme-core.ko] undefined! ERROR: "dma_pool_create" [drivers/nvme/host/nvme-core.ko] undefined! Signed-off-by: Geert Uytterhoeven Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 61c68a1f054a..2f5d5f4a4c75 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,7 +4,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK + depends on BLOCK && HAS_DMA help Say Y here to get to enable Open-channel SSDs. -- cgit From ac81bfa9867103c9d50996ec21fa9179b81b727e Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 16 Sep 2016 14:25:04 +0200 Subject: nvme: refactor namespaces to support non-gendisk devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With LightNVM enabled namespaces, the gendisk structure is not exposed to the user. This prevents LightNVM users from accessing the NVMe device driver specific sysfs entries, and LightNVM namespace geometry. Refactor the revalidation process, so that a namespace, instead of a gendisk, is revalidated. This later allows patches to wire up the sysfs entries up to a non-gendisk namespace. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 2 + drivers/nvme/host/core.c | 114 +++++++++++++++++++++++++++---------------- drivers/nvme/host/lightnvm.c | 5 +- 3 files changed, 78 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 9ebd2cfbd849..25c5df920326 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -581,6 +581,8 @@ static int nvm_core_init(struct nvm_dev *dev) mutex_init(&dev->mlock); spin_lock_init(&dev->lock); + blk_queue_logical_block_size(dev->q, dev->sec_size); + return 0; err_fmtype: kfree(dev->lun_map); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2feacc70bf61..2c3da3315a02 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -888,42 +888,33 @@ static void nvme_config_discard(struct nvme_ns *ns) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); } -static int nvme_revalidate_disk(struct gendisk *disk) +static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) { - struct nvme_ns *ns = disk->private_data; - struct nvme_id_ns *id; - u8 lbaf, pi_type; - u16 old_ms; - unsigned short bs; - - if (test_bit(NVME_NS_DEAD, &ns->flags)) { - set_capacity(disk, 0); - return -ENODEV; - } - if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { + if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n", __func__); return -ENODEV; } - if (id->ncap == 0) { - kfree(id); - return -ENODEV; - } - if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { - if (nvme_nvm_register(ns->queue, disk->disk_name)) { - dev_warn(disk_to_dev(ns->disk), - "%s: LightNVM init failure\n", __func__); - kfree(id); - return -ENODEV; - } - ns->type = NVME_NS_LIGHTNVM; + if ((*id)->ncap == 0) { + kfree(*id); + return -ENODEV; } if (ns->ctrl->vs >= NVME_VS(1, 1)) - memcpy(ns->eui, id->eui64, sizeof(ns->eui)); + memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); if (ns->ctrl->vs >= NVME_VS(1, 2)) - memcpy(ns->uuid, id->nguid, sizeof(ns->uuid)); + memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid)); + + return 0; +} + +static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +{ + struct nvme_ns *ns = disk->private_data; + u8 lbaf, pi_type; + u16 old_ms; + unsigned short bs; old_ms = ns->ms; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; @@ -962,8 +953,26 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); +} +static int nvme_revalidate_disk(struct gendisk *disk) +{ + struct nvme_ns *ns = disk->private_data; + struct nvme_id_ns *id = NULL; + int ret; + + if (test_bit(NVME_NS_DEAD, &ns->flags)) { + set_capacity(disk, 0); + return -ENODEV; + } + + ret = nvme_revalidate_ns(ns, &id); + if (ret) + return ret; + + __nvme_revalidate_disk(disk, id); kfree(id); + return 0; } @@ -1642,6 +1651,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; + struct nvme_id_ns *id; + char disk_name[DISK_NAME_LEN]; int node = dev_to_node(ctrl->dev); ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); @@ -1659,33 +1670,54 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue->queuedata = ns; ns->ctrl = ctrl; - disk = alloc_disk_node(0, node); - if (!disk) - goto out_free_queue; - kref_init(&ns->kref); ns->ns_id = nsid; - ns->disk = disk; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ - blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - disk->fops = &nvme_fops; - disk->private_data = ns; - disk->queue = ns->queue; - disk->flags = GENHD_FL_EXT_DEVT; - sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); - if (nvme_revalidate_disk(ns->disk)) - goto out_free_disk; + if (nvme_revalidate_ns(ns, &id)) + goto out_free_queue; + + if (nvme_nvm_ns_supported(ns, id)) { + if (nvme_nvm_register(ns->queue, disk_name)) { + dev_warn(ctrl->dev, + "%s: LightNVM init failure\n", __func__); + goto out_free_id; + } + + disk = alloc_disk_node(0, node); + if (!disk) + goto out_free_id; + memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); + ns->disk = disk; + ns->type = NVME_NS_LIGHTNVM; + } else { + disk = alloc_disk_node(0, node); + if (!disk) + goto out_free_id; + + disk->fops = &nvme_fops; + disk->private_data = ns; + disk->queue = ns->queue; + disk->flags = GENHD_FL_EXT_DEVT; + memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); + ns->disk = disk; + + __nvme_revalidate_disk(disk, id); + } mutex_lock(&ctrl->namespaces_mutex); list_add_tail(&ns->list, &ctrl->namespaces); mutex_unlock(&ctrl->namespaces_mutex); kref_get(&ctrl->kref); + + kfree(id); + if (ns->type == NVME_NS_LIGHTNVM) return; @@ -1695,8 +1727,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) pr_warn("%s: failed to create sysfs group for identification\n", ns->disk->disk_name); return; - out_free_disk: - kfree(disk); + out_free_id: + kfree(id); out_free_queue: blk_cleanup_queue(ns->queue); out_release_instance: diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 63f483daf930..7268a7a1a19a 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -474,8 +474,9 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) - c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, - rqd->bio->bi_iter.bi_sector)); + /* momentarily hardcode the shift configuration. lba_shift from + * nvm_dev will be available in a follow-up patch */ + c->hb_rw.slba = cpu_to_le64(rqd->bio->bi_iter.bi_sector >> 3); } static void nvme_nvm_end_io(struct request *rq, int error) -- cgit From 9ae2d0aa5046c67dd37cf4b70998ad296e718835 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 16 Sep 2016 14:25:05 +0200 Subject: null_blk: refactor to support non-gendisk devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With LightNVM enabled devices, the gendisk structure is not exposed to the user. This hides the device driver specific sysfs entries, and prevents binding of LightNVM geometry information to the device. Refactor the device registration process, so that gendisk and non-gendisk devices are easily managed. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 110 ++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 75a7f88d6717..895867a8a783 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -414,23 +414,6 @@ static void cleanup_queues(struct nullb *nullb) kfree(nullb->queues); } -static void null_del_dev(struct nullb *nullb) -{ - list_del_init(&nullb->list); - - if (use_lightnvm) - nvm_unregister(nullb->disk_name); - else - del_gendisk(nullb->disk); - blk_cleanup_queue(nullb->q); - if (queue_mode == NULL_Q_MQ) - blk_mq_free_tag_set(&nullb->tag_set); - if (!use_lightnvm) - put_disk(nullb->disk); - cleanup_queues(nullb); - kfree(nullb); -} - #ifdef CONFIG_NVM static void null_lnvm_end_io(struct request *rq, int error) @@ -564,10 +547,41 @@ static struct nvm_dev_ops null_lnvm_dev_ops = { /* Simulate nvme protocol restriction */ .max_phys_sect = 64, }; + +static int null_nvm_register(struct nullb *nullb) +{ + return nvm_register(nullb->q, nullb->disk_name, &null_lnvm_dev_ops); +} + +static void null_nvm_unregister(struct nullb *nullb) +{ + nvm_unregister(nullb->disk_name); +} #else -static struct nvm_dev_ops null_lnvm_dev_ops; +static int null_nvm_register(struct nullb *nullb) +{ + return -EINVAL; +} +static void null_nvm_unregister(struct nullb *nullb) {} #endif /* CONFIG_NVM */ +static void null_del_dev(struct nullb *nullb) +{ + list_del_init(&nullb->list); + + if (use_lightnvm) + null_nvm_unregister(nullb); + else + del_gendisk(nullb->disk); + blk_cleanup_queue(nullb->q); + if (queue_mode == NULL_Q_MQ) + blk_mq_free_tag_set(&nullb->tag_set); + if (!use_lightnvm) + put_disk(nullb->disk); + cleanup_queues(nullb); + kfree(nullb); +} + static int null_open(struct block_device *bdev, fmode_t mode) { return 0; @@ -640,11 +654,32 @@ static int init_driver_queues(struct nullb *nullb) return 0; } -static int null_add_dev(void) +static int null_gendisk_register(struct nullb *nullb) { struct gendisk *disk; - struct nullb *nullb; sector_t size; + + disk = nullb->disk = alloc_disk_node(1, home_node); + if (!disk) + return -ENOMEM; + size = gb * 1024 * 1024 * 1024ULL; + set_capacity(disk, size >> 9); + + disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; + disk->major = null_major; + disk->first_minor = nullb->index; + disk->fops = &null_fops; + disk->private_data = nullb; + disk->queue = nullb->q; + strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + + add_disk(disk); + return 0; +} + +static int null_add_dev(void) +{ + struct nullb *nullb; int rv; nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); @@ -716,42 +751,19 @@ static int null_add_dev(void) sprintf(nullb->disk_name, "nullb%d", nullb->index); - if (use_lightnvm) { - rv = nvm_register(nullb->q, nullb->disk_name, - &null_lnvm_dev_ops); - if (rv) - goto out_cleanup_blk_queue; - goto done; - } - - disk = nullb->disk = alloc_disk_node(1, home_node); - if (!disk) { - rv = -ENOMEM; - goto out_cleanup_lightnvm; - } - size = gb * 1024 * 1024 * 1024ULL; - set_capacity(disk, size >> 9); - - disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; - disk->major = null_major; - disk->first_minor = nullb->index; - disk->fops = &null_fops; - disk->private_data = nullb; - disk->queue = nullb->q; - strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + if (use_lightnvm) + rv = null_nvm_register(nullb); + else + rv = null_gendisk_register(nullb); - add_disk(disk); + if (rv) + goto out_cleanup_blk_queue; -done: mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); mutex_unlock(&lock); return 0; - -out_cleanup_lightnvm: - if (use_lightnvm) - nvm_unregister(nullb->disk_name); out_cleanup_blk_queue: blk_cleanup_queue(nullb->q); out_cleanup_tags: -- cgit From b21d5b301794ae332eaa6e177d71fe8b77d3664c Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 16 Sep 2016 14:25:06 +0200 Subject: blk-mq: register device instead of disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable devices without a gendisk instance to register itself with blk-mq and expose the associated multi-queue sysfs entries. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 17 +++++++---------- block/blk-sysfs.c | 4 ++-- drivers/md/dm-rq.c | 2 +- include/linux/blk-mq.h | 4 ++-- 4 files changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 3c385b196bc7..01fb455d3377 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -393,9 +393,8 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) return ret; } -static void __blk_mq_unregister_disk(struct gendisk *disk) +static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { - struct request_queue *q = disk->queue; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; int i, j; @@ -413,15 +412,15 @@ static void __blk_mq_unregister_disk(struct gendisk *disk) kobject_del(&q->mq_kobj); kobject_put(&q->mq_kobj); - kobject_put(&disk_to_dev(disk)->kobj); + kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; } -void blk_mq_unregister_disk(struct gendisk *disk) +void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { blk_mq_disable_hotplug(); - __blk_mq_unregister_disk(disk); + __blk_mq_unregister_dev(dev, q); blk_mq_enable_hotplug(); } @@ -443,10 +442,8 @@ static void blk_mq_sysfs_init(struct request_queue *q) } } -int blk_mq_register_disk(struct gendisk *disk) +int blk_mq_register_dev(struct device *dev, struct request_queue *q) { - struct device *dev = disk_to_dev(disk); - struct request_queue *q = disk->queue; struct blk_mq_hw_ctx *hctx; int ret, i; @@ -467,7 +464,7 @@ int blk_mq_register_disk(struct gendisk *disk) } if (ret) - __blk_mq_unregister_disk(disk); + __blk_mq_unregister_dev(dev, q); else q->mq_sysfs_init_done = true; out: @@ -475,7 +472,7 @@ out: return ret; } -EXPORT_SYMBOL_GPL(blk_mq_register_disk); +EXPORT_SYMBOL_GPL(blk_mq_register_dev); void blk_mq_sysfs_unregister(struct request_queue *q) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f87a7e747d36..9cc8d7c5439a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -704,7 +704,7 @@ int blk_register_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_ADD); if (q->mq_ops) - blk_mq_register_disk(disk); + blk_mq_register_dev(dev, q); if (!q->request_fn) return 0; @@ -729,7 +729,7 @@ void blk_unregister_queue(struct gendisk *disk) return; if (q->mq_ops) - blk_mq_unregister_disk(disk); + blk_mq_unregister_dev(disk_to_dev(disk), q); if (q->request_fn) elv_unregister_queue(q); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 1ca7463e8bb2..ee48230a2952 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -955,7 +955,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) dm_init_md_queue(md); /* backfill 'mq' sysfs registration normally done in blk_register_queue */ - blk_mq_register_disk(md->disk); + blk_mq_register_dev(disk_to_dev(md->disk), q); return 0; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2575779cf13f..fbcfdf323243 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -175,8 +175,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -int blk_mq_register_disk(struct gendisk *); -void blk_mq_unregister_disk(struct gendisk *); +int blk_mq_register_dev(struct device *, struct request_queue *); +void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); -- cgit From b0b4e09c1ae71c4ec33df0616b830ae050006e9b Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 16 Sep 2016 14:25:07 +0200 Subject: lightnvm: control life of nvm_dev in driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LightNVM compatible device drivers does not have a method to expose LightNVM specific sysfs entries. To enable LightNVM sysfs entries to be exposed, lightnvm device drivers require a struct device to attach it to. To allow both the actual device driver and lightnvm sysfs entries to coexist, the device driver tracks the lifetime of the nvm_dev structure. This patch refactors NVMe and null_blk to handle the lifetime of struct nvm_dev, which eliminates the need for struct gendisk when a lightnvm compatible device is provided. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 22 ++++++++++++++++++++-- drivers/lightnvm/core.c | 35 ++++++++--------------------------- drivers/nvme/host/core.c | 36 +++++++++++++++--------------------- drivers/nvme/host/lightnvm.c | 31 ++++++++++++++++++++++++------- drivers/nvme/host/nvme.h | 12 +++++++----- include/linux/lightnvm.h | 15 +++++++++------ 6 files changed, 83 insertions(+), 68 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 895867a8a783..91e1de898daf 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -34,6 +34,7 @@ struct nullb { unsigned int index; struct request_queue *q; struct gendisk *disk; + struct nvm_dev *ndev; struct blk_mq_tag_set tag_set; struct hrtimer timer; unsigned int queue_depth; @@ -550,12 +551,29 @@ static struct nvm_dev_ops null_lnvm_dev_ops = { static int null_nvm_register(struct nullb *nullb) { - return nvm_register(nullb->q, nullb->disk_name, &null_lnvm_dev_ops); + struct nvm_dev *dev; + int rv; + + dev = nvm_alloc_dev(0); + if (!dev) + return -ENOMEM; + + dev->q = nullb->q; + memcpy(dev->name, nullb->disk_name, DISK_NAME_LEN); + dev->ops = &null_lnvm_dev_ops; + + rv = nvm_register(dev); + if (rv) { + kfree(dev); + return rv; + } + nullb->ndev = dev; + return 0; } static void null_nvm_unregister(struct nullb *nullb) { - nvm_unregister(nullb->disk_name); + nvm_unregister(nullb->ndev); } #else static int null_nvm_register(struct nullb *nullb) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 25c5df920326..a99b59d1eb36 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -660,22 +660,15 @@ static void nvm_exit(struct nvm_dev *dev) pr_info("nvm: successfully unloaded\n"); } -int nvm_register(struct request_queue *q, char *disk_name, - struct nvm_dev_ops *ops) +struct nvm_dev *nvm_alloc_dev(int node) { - struct nvm_dev *dev; - int ret; - - if (!ops->identity) - return -EINVAL; - - dev = kzalloc(sizeof(struct nvm_dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; + return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); +} +EXPORT_SYMBOL(nvm_alloc_dev); - dev->q = q; - dev->ops = ops; - strncpy(dev->name, disk_name, DISK_NAME_LEN); +int nvm_register(struct nvm_dev *dev) +{ + int ret; ret = nvm_init(dev); if (ret) @@ -714,29 +707,17 @@ int nvm_register(struct request_queue *q, char *disk_name, return 0; err_init: kfree(dev->lun_map); - kfree(dev); return ret; } EXPORT_SYMBOL(nvm_register); -void nvm_unregister(char *disk_name) +void nvm_unregister(struct nvm_dev *dev) { - struct nvm_dev *dev; - down_write(&nvm_lock); - dev = nvm_find_nvm_dev(disk_name); - if (!dev) { - pr_err("nvm: could not find device %s to unregister\n", - disk_name); - up_write(&nvm_lock); - return; - } - list_del(&dev->devices); up_write(&nvm_lock); nvm_exit(dev); - kfree(dev); } EXPORT_SYMBOL(nvm_unregister); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2c3da3315a02..3c707d83b1da 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -156,12 +156,14 @@ static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - if (ns->type == NVME_NS_LIGHTNVM) - nvme_nvm_unregister(ns->queue, ns->disk->disk_name); + if (ns->ndev) + nvme_nvm_unregister(ns); - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); + if (ns->disk) { + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + } put_disk(ns->disk); ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); @@ -891,8 +893,7 @@ static void nvme_config_discard(struct nvme_ns *ns) static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) { if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { - dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n", - __func__); + dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__); return -ENODEV; } @@ -1683,18 +1684,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_queue; if (nvme_nvm_ns_supported(ns, id)) { - if (nvme_nvm_register(ns->queue, disk_name)) { - dev_warn(ctrl->dev, - "%s: LightNVM init failure\n", __func__); + if (nvme_nvm_register(ns, disk_name, node)) { + dev_warn(ctrl->dev, "%s: LightNVM init failure\n", + __func__); goto out_free_id; } - - disk = alloc_disk_node(0, node); - if (!disk) - goto out_free_id; - memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); - ns->disk = disk; - ns->type = NVME_NS_LIGHTNVM; } else { disk = alloc_disk_node(0, node); if (!disk) @@ -1718,7 +1712,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) kfree(id); - if (ns->type == NVME_NS_LIGHTNVM) + if (ns->ndev) return; device_add_disk(ctrl->device, ns->disk); @@ -1742,7 +1736,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; - if (ns->disk->flags & GENHD_FL_UP) { + if (ns->disk && ns->disk->flags & GENHD_FL_UP) { if (blk_get_integrity(ns->disk)) blk_integrity_unregister(ns->disk); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, @@ -1765,7 +1759,7 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns = nvme_find_get_ns(ctrl, nsid); if (ns) { - if (revalidate_disk(ns->disk)) + if (ns->disk && revalidate_disk(ns->disk)) nvme_ns_remove(ns); nvme_put_ns(ns); } else @@ -2070,7 +2064,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * Revalidating a dead namespace sets capacity to 0. This will * end buffered writers dirtying pages that can't be synced. */ - if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 7268a7a1a19a..798fcd9f5d1f 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -474,9 +474,8 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) - /* momentarily hardcode the shift configuration. lba_shift from - * nvm_dev will be available in a follow-up patch */ - c->hb_rw.slba = cpu_to_le64(rqd->bio->bi_iter.bi_sector >> 3); + c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, + rqd->bio->bi_iter.bi_sector)); } static void nvme_nvm_end_io(struct request *rq, int error) @@ -593,14 +592,32 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; -int nvme_nvm_register(struct request_queue *q, char *disk_name) +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { - return nvm_register(q, disk_name, &nvme_nvm_dev_ops); + struct request_queue *q = ns->queue; + struct nvm_dev *dev; + int ret; + + dev = nvm_alloc_dev(node); + if (!dev) + return -ENOMEM; + + dev->q = q; + memcpy(dev->name, disk_name, DISK_NAME_LEN); + dev->ops = &nvme_nvm_dev_ops; + ns->ndev = dev; + + ret = nvm_register(dev); + + ns->lba_shift = ilog2(dev->sec_size) - 9; + + return ret; } -void nvme_nvm_unregister(struct request_queue *q, char *disk_name) +void nvme_nvm_unregister(struct nvme_ns *ns) { - nvm_unregister(disk_name); + nvm_unregister(ns->ndev); + kfree(ns->ndev); } /* move to shared place when used in multiple places. */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab18b78102bf..e0535c14e538 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -18,6 +18,7 @@ #include #include #include +#include enum { /* @@ -154,6 +155,7 @@ struct nvme_ns { struct nvme_ctrl *ctrl; struct request_queue *queue; struct gendisk *disk; + struct nvm_dev *ndev; struct kref kref; int instance; @@ -165,7 +167,6 @@ struct nvme_ns { u16 ms; bool ext; u8 pi_type; - int type; unsigned long flags; #define NVME_NS_REMOVING 0 @@ -307,15 +308,16 @@ int nvme_sg_get_version_num(int __user *ip); #ifdef CONFIG_NVM int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); -int nvme_nvm_register(struct request_queue *q, char *disk_name); -void nvme_nvm_unregister(struct request_queue *q, char *disk_name); +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); +void nvme_nvm_unregister(struct nvme_ns *ns); #else -static inline int nvme_nvm_register(struct request_queue *q, char *disk_name) +static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, + int node) { return 0; } -static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {}; +static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ba78b8306674..5afc2634f332 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -524,9 +524,9 @@ extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *, unsigned long); extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *); -extern int nvm_register(struct request_queue *, char *, - struct nvm_dev_ops *); -extern void nvm_unregister(char *); +extern struct nvm_dev *nvm_alloc_dev(int); +extern int nvm_register(struct nvm_dev *); +extern void nvm_unregister(struct nvm_dev *); void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type); @@ -575,11 +575,14 @@ extern int nvm_dev_factory(struct nvm_dev *, int flags); #else /* CONFIG_NVM */ struct nvm_dev_ops; -static inline int nvm_register(struct request_queue *q, char *disk_name, - struct nvm_dev_ops *ops) +static inline struct nvm_dev *nvm_alloc_dev(int node) +{ + return ERR_PTR(-EINVAL); +} +static inline int nvm_register(struct nvm_dev *dev) { return -EINVAL; } -static inline void nvm_unregister(char *disk_name) {} +static inline void nvm_unregister(struct nvm_dev *dev) {} #endif /* CONFIG_NVM */ #endif /* LIGHTNVM.H */ -- cgit From 40267efddc296190d50c61d96daf277151447cf6 Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Fri, 16 Sep 2016 14:25:08 +0200 Subject: lightnvm: expose device geometry through sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a host to access an Open-Channel SSD, it has to know its geometry, so that it writes and reads at the appropriate device bounds. Currently, the geometry information is kept within the kernel, and not exported to user-space for consumption. This patch exposes the configuration through sysfs and enables user-space libraries, such as liblightnvm, to use the sysfs implementation to get the geometry of an Open-Channel SSD. The sysfs entries are stored within the device hierarchy, and can be found using the "lightnvm" device type. An example configuration looks like this: /sys/class/nvme/ └── nvme0n1 ├── capabilities: 3 ├── device_mode: 1 ├── erase_max: 1000000 ├── erase_typ: 1000000 ├── flash_media_type: 0 ├── media_capabilities: 0x00000001 ├── media_type: 0 ├── multiplane: 0x00010101 ├── num_blocks: 1022 ├── num_channels: 1 ├── num_luns: 4 ├── num_pages: 64 ├── num_planes: 1 ├── page_size: 4096 ├── prog_max: 100000 ├── prog_typ: 100000 ├── read_max: 10000 ├── read_typ: 10000 ├── sector_oob_size: 0 ├── sector_size: 4096 ├── media_manager: gennvm ├── ppa_format: 0x380830082808001010102008 ├── vendor_opcode: 0 ├── max_phys_secs: 64 └── version: 1 Signed-off-by: Simon A. F. Lund Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/Makefile | 2 +- drivers/lightnvm/core.c | 20 +++-- drivers/lightnvm/lightnvm.h | 35 ++++++++ drivers/lightnvm/sysfs.c | 195 +++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/core.c | 13 +-- drivers/nvme/host/lightnvm.c | 9 +- drivers/nvme/host/nvme.h | 18 +++- include/linux/lightnvm.h | 3 + 8 files changed, 278 insertions(+), 17 deletions(-) create mode 100644 drivers/lightnvm/lightnvm.h create mode 100644 drivers/lightnvm/sysfs.c (limited to 'drivers') diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index a7a0a22cf1a5..1f6b6521016a 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -2,6 +2,6 @@ # Makefile for Open-Channel SSDs. # -obj-$(CONFIG_NVM) := core.o sysblk.o +obj-$(CONFIG_NVM) := core.o sysblk.o sysfs.o obj-$(CONFIG_NVM_GENNVM) += gennvm.o obj-$(CONFIG_NVM_RRPC) += rrpc.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index a99b59d1eb36..a2393e1ef82e 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -27,6 +27,8 @@ #include #include +#include "lightnvm.h" + static LIST_HEAD(nvm_tgt_types); static DECLARE_RWSEM(nvm_tgtt_lock); static LIST_HEAD(nvm_mgrs); @@ -598,15 +600,19 @@ static void nvm_free_mgr(struct nvm_dev *dev) dev->mt = NULL; } -static void nvm_free(struct nvm_dev *dev) +void nvm_free(struct nvm_dev *dev) { if (!dev) return; nvm_free_mgr(dev); + if (dev->dma_pool) + dev->ops->destroy_dma_pool(dev->dma_pool); + kfree(dev->lptbl); kfree(dev->lun_map); + kfree(dev); } static int nvm_init(struct nvm_dev *dev) @@ -653,11 +659,7 @@ err: static void nvm_exit(struct nvm_dev *dev) { - if (dev->dma_pool) - dev->ops->destroy_dma_pool(dev->dma_pool); - nvm_free(dev); - - pr_info("nvm: successfully unloaded\n"); + nvm_sysfs_unregister_dev(dev); } struct nvm_dev *nvm_alloc_dev(int node) @@ -689,6 +691,10 @@ int nvm_register(struct nvm_dev *dev) } } + ret = nvm_sysfs_register_dev(dev); + if (ret) + goto err_ppalist; + if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { ret = nvm_get_sysblock(dev, &dev->sb); if (!ret) @@ -705,6 +711,8 @@ int nvm_register(struct nvm_dev *dev) up_write(&nvm_lock); return 0; +err_ppalist: + dev->ops->destroy_dma_pool(dev->dma_pool); err_init: kfree(dev->lun_map); return ret; diff --git a/drivers/lightnvm/lightnvm.h b/drivers/lightnvm/lightnvm.h new file mode 100644 index 000000000000..93f1aacc9f02 --- /dev/null +++ b/drivers/lightnvm/lightnvm.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2016 CNEX Labs. All rights reserved. + * Initial release: Matias Bjorling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + */ + +#ifndef LIGHTNVM_H +#define LIGHTNVM_H + +#include + +/* core -> sysfs.c */ +int nvm_sysfs_register_dev(struct nvm_dev *); +void nvm_sysfs_unregister_dev(struct nvm_dev *); +int nvm_sysfs_register(void); +void nvm_sysfs_unregister(void); + +/* sysfs > core */ +void nvm_free(struct nvm_dev *); + +#endif diff --git a/drivers/lightnvm/sysfs.c b/drivers/lightnvm/sysfs.c new file mode 100644 index 000000000000..72ad089c0269 --- /dev/null +++ b/drivers/lightnvm/sysfs.c @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include + +#include "lightnvm.h" + +static ssize_t nvm_dev_attr_show(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvm_dev *ndev = container_of(dev, struct nvm_dev, dev); + struct nvm_id *id = &ndev->identity; + struct nvm_id_group *grp = &id->groups[0]; + struct attribute *attr = &dattr->attr; + + if (strcmp(attr->name, "version") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); + } else if (strcmp(attr->name, "vendor_opcode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); + } else if (strcmp(attr->name, "capabilities") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); + } else if (strcmp(attr->name, "device_mode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + } else if (strcmp(attr->name, "media_manager") == 0) { + if (!ndev->mt) + return scnprintf(page, PAGE_SIZE, "%s\n", "none"); + return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name); + } else if (strcmp(attr->name, "ppa_format") == 0) { + return scnprintf(page, PAGE_SIZE, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + id->ppaf.ch_offset, id->ppaf.ch_len, + id->ppaf.lun_offset, id->ppaf.lun_len, + id->ppaf.pln_offset, id->ppaf.pln_len, + id->ppaf.blk_offset, id->ppaf.blk_len, + id->ppaf.pg_offset, id->ppaf.pg_len, + id->ppaf.sect_offset, id->ppaf.sect_len); + } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); + } else if (strcmp(attr->name, "flash_media_type") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); + } else if (strcmp(attr->name, "num_channels") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); + } else if (strcmp(attr->name, "num_luns") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); + } else if (strcmp(attr->name, "num_planes") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); + } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk); + } else if (strcmp(attr->name, "num_pages") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); + } else if (strcmp(attr->name, "page_size") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); + } else if (strcmp(attr->name, "hw_sector_size") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); + } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); + } else if (strcmp(attr->name, "read_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt); + } else if (strcmp(attr->name, "read_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm); + } else if (strcmp(attr->name, "prog_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); + } else if (strcmp(attr->name, "prog_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); + } else if (strcmp(attr->name, "erase_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); + } else if (strcmp(attr->name, "erase_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); + } else if (strcmp(attr->name, "multiplane_modes") == 0) { + return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); + } else if (strcmp(attr->name, "media_capabilities") == 0) { + return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); + } else if (strcmp(attr->name, "max_phys_secs") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", + ndev->ops->max_phys_sect); + } else { + return scnprintf(page, + PAGE_SIZE, + "Unhandled attr(%s) in `nvm_dev_attr_show`\n", + attr->name); + } +} + +#define NVM_DEV_ATTR_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) + +static NVM_DEV_ATTR_RO(version); +static NVM_DEV_ATTR_RO(vendor_opcode); +static NVM_DEV_ATTR_RO(capabilities); +static NVM_DEV_ATTR_RO(device_mode); +static NVM_DEV_ATTR_RO(ppa_format); +static NVM_DEV_ATTR_RO(media_manager); + +static NVM_DEV_ATTR_RO(media_type); +static NVM_DEV_ATTR_RO(flash_media_type); +static NVM_DEV_ATTR_RO(num_channels); +static NVM_DEV_ATTR_RO(num_luns); +static NVM_DEV_ATTR_RO(num_planes); +static NVM_DEV_ATTR_RO(num_blocks); +static NVM_DEV_ATTR_RO(num_pages); +static NVM_DEV_ATTR_RO(page_size); +static NVM_DEV_ATTR_RO(hw_sector_size); +static NVM_DEV_ATTR_RO(oob_sector_size); +static NVM_DEV_ATTR_RO(read_typ); +static NVM_DEV_ATTR_RO(read_max); +static NVM_DEV_ATTR_RO(prog_typ); +static NVM_DEV_ATTR_RO(prog_max); +static NVM_DEV_ATTR_RO(erase_typ); +static NVM_DEV_ATTR_RO(erase_max); +static NVM_DEV_ATTR_RO(multiplane_modes); +static NVM_DEV_ATTR_RO(media_capabilities); +static NVM_DEV_ATTR_RO(max_phys_secs); + +#define NVM_DEV_ATTR(_name) (dev_attr_##_name##) + +static struct attribute *nvm_dev_attrs[] = { + &dev_attr_version.attr, + &dev_attr_vendor_opcode.attr, + &dev_attr_capabilities.attr, + &dev_attr_device_mode.attr, + &dev_attr_media_manager.attr, + + &dev_attr_ppa_format.attr, + &dev_attr_media_type.attr, + &dev_attr_flash_media_type.attr, + &dev_attr_num_channels.attr, + &dev_attr_num_luns.attr, + &dev_attr_num_planes.attr, + &dev_attr_num_blocks.attr, + &dev_attr_num_pages.attr, + &dev_attr_page_size.attr, + &dev_attr_hw_sector_size.attr, + &dev_attr_oob_sector_size.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + &dev_attr_prog_typ.attr, + &dev_attr_prog_max.attr, + &dev_attr_erase_typ.attr, + &dev_attr_erase_max.attr, + &dev_attr_multiplane_modes.attr, + &dev_attr_media_capabilities.attr, + &dev_attr_max_phys_secs.attr, + NULL, +}; + +static struct attribute_group nvm_dev_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, +}; + +static const struct attribute_group *nvm_dev_attr_groups[] = { + &nvm_dev_attr_group, + NULL, +}; + +static void nvm_dev_release(struct device *device) +{ + struct nvm_dev *dev = container_of(device, struct nvm_dev, dev); + struct request_queue *q = dev->q; + + pr_debug("nvm/sysfs: `nvm_dev_release`\n"); + + blk_mq_unregister_dev(device, q); + + nvm_free(dev); +} + +static struct device_type nvm_type = { + .name = "lightnvm", + .groups = nvm_dev_attr_groups, + .release = nvm_dev_release, +}; + +int nvm_sysfs_register_dev(struct nvm_dev *dev) +{ + if (!dev->parent_dev) + return 0; + + dev->dev.parent = dev->parent_dev; + dev_set_name(&dev->dev, "%s", dev->name); + dev->dev.type = &nvm_type; + device_initialize(&dev->dev); + device_add(&dev->dev); + + blk_mq_register_dev(&dev->dev, dev->q); + + return 0; +} + +void nvm_sysfs_unregister_dev(struct nvm_dev *dev) +{ + if (dev && dev->parent_dev) + kobject_put(&dev->dev.kobj); +} diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3c707d83b1da..bd2156cbfc6c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1435,7 +1435,7 @@ static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); struct nvme_ctrl *ctrl = ns->ctrl; int serial_len = sizeof(ctrl->serial); int model_len = sizeof(ctrl->model); @@ -1459,7 +1459,7 @@ static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%pU\n", ns->uuid); } static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); @@ -1467,7 +1467,7 @@ static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%8phd\n", ns->eui); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); @@ -1475,7 +1475,7 @@ static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%d\n", ns->ns_id); } static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); @@ -1492,7 +1492,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); if (a == &dev_attr_uuid.attr) { if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) @@ -1684,7 +1684,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_queue; if (nvme_nvm_ns_supported(ns, id)) { - if (nvme_nvm_register(ns, disk_name, node)) { + if (nvme_nvm_register(ns, disk_name, node, + &nvme_ns_attr_group)) { dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__); goto out_free_id; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 798fcd9f5d1f..f5e3011e31fc 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -592,7 +592,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, + const struct attribute_group *attrs) { struct request_queue *q = ns->queue; struct nvm_dev *dev; @@ -605,19 +606,23 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) dev->q = q; memcpy(dev->name, disk_name, DISK_NAME_LEN); dev->ops = &nvme_nvm_dev_ops; + dev->parent_dev = ns->ctrl->device; + dev->private_data = ns; ns->ndev = dev; ret = nvm_register(dev); ns->lba_shift = ilog2(dev->sec_size) - 9; + if (sysfs_create_group(&dev->dev.kobj, attrs)) + pr_warn("%s: failed to create sysfs group for identification\n", + disk_name); return ret; } void nvme_nvm_unregister(struct nvme_ns *ns) { nvm_unregister(ns->ndev); - kfree(ns->ndev); } /* move to shared place when used in multiple places. */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e0535c14e538..bfd25dd73bca 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -308,11 +308,21 @@ int nvme_sg_get_version_num(int __user *ip); #ifdef CONFIG_NVM int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, + const struct attribute_group *attrs); void nvme_nvm_unregister(struct nvme_ns *ns); + +static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) +{ + if (dev->type->devnode) + return dev_to_disk(dev)->private_data; + + return (container_of(dev, struct nvm_dev, dev))->private_data; +} #else static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, - int node) + int node, + const struct attribute_group *attrs) { return 0; } @@ -323,6 +333,10 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i { return 0; } +static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) +{ + return dev_to_disk(dev)->private_data; +} #endif /* CONFIG_NVM */ int __init nvme_core_init(void); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5afc2634f332..d190786e4ad8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -352,7 +352,10 @@ struct nvm_dev { /* Backend device */ struct request_queue *q; + struct device dev; + struct device *parent_dev; char name[DISK_NAME_LEN]; + void *private_data; struct mutex mlock; spinlock_t lock; -- cgit From 1e3aeae4ea710023dda2a6b780183ee371d1a796 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Sep 2016 14:25:09 +0200 Subject: lightnvm: propagate device_add() error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit device_add() may fail, and all callers are supposed to check the return value, but one new user in lightnvm doesn't: drivers/lightnvm/sysfs.c: In function 'nvm_sysfs_register_dev': drivers/lightnvm/sysfs.c:184:2: error: ignoring return value of 'device_add', declared with attribute warn_unused_result [-Werror=unused-result] This changes the caller to propagate any error codes, which avoids the warning. Signed-off-by: Arnd Bergmann Fixes: 38c9e260b9f9 ("lightnvm: expose device geometry through sysfs") Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/lightnvm.h | 2 +- drivers/lightnvm/sysfs.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/lightnvm/lightnvm.h b/drivers/lightnvm/lightnvm.h index 93f1aacc9f02..305c181509a6 100644 --- a/drivers/lightnvm/lightnvm.h +++ b/drivers/lightnvm/lightnvm.h @@ -24,7 +24,7 @@ #include /* core -> sysfs.c */ -int nvm_sysfs_register_dev(struct nvm_dev *); +int __must_check nvm_sysfs_register_dev(struct nvm_dev *); void nvm_sysfs_unregister_dev(struct nvm_dev *); int nvm_sysfs_register(void); void nvm_sysfs_unregister(void); diff --git a/drivers/lightnvm/sysfs.c b/drivers/lightnvm/sysfs.c index 72ad089c0269..0338c27ab95a 100644 --- a/drivers/lightnvm/sysfs.c +++ b/drivers/lightnvm/sysfs.c @@ -174,6 +174,8 @@ static struct device_type nvm_type = { int nvm_sysfs_register_dev(struct nvm_dev *dev) { + int ret; + if (!dev->parent_dev) return 0; @@ -181,11 +183,12 @@ int nvm_sysfs_register_dev(struct nvm_dev *dev) dev_set_name(&dev->dev, "%s", dev->name); dev->dev.type = &nvm_type; device_initialize(&dev->dev); - device_add(&dev->dev); + ret = device_add(&dev->dev); - blk_mq_register_dev(&dev->dev, dev->q); + if (!ret) + blk_mq_register_dev(&dev->dev, dev->q); - return 0; + return ret; } void nvm_sysfs_unregister_dev(struct nvm_dev *dev) -- cgit From 491221f88d00651e449c9caf7415b6453c8a77b7 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 22 Sep 2016 03:10:01 -0400 Subject: block: export bio_free_pages to other modules bio_free_pages is introduced in commit 1dfa0f68c040 ("block: add a helper to free bio bounce buffer pages"), we can reuse the func in other modules after it was imported. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Mike Snitzer Cc: Shaohua Li Signed-off-by: Guoqing Jiang Acked-by: Kent Overstreet Signed-off-by: Jens Axboe --- block/bio.c | 3 ++- drivers/md/bcache/btree.c | 6 +----- drivers/md/bcache/debug.c | 6 ++---- drivers/md/bcache/movinggc.c | 5 +---- drivers/md/bcache/request.c | 9 ++------- drivers/md/bcache/writeback.c | 5 +---- drivers/md/dm-log-writes.c | 6 +----- drivers/md/raid1.c | 8 ++------ include/linux/bio.h | 1 + 9 files changed, 13 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/block/bio.c b/block/bio.c index a6d279e1ea9e..db85c5753a76 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1068,7 +1068,7 @@ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) return 0; } -static void bio_free_pages(struct bio *bio) +void bio_free_pages(struct bio *bio) { struct bio_vec *bvec; int i; @@ -1076,6 +1076,7 @@ static void bio_free_pages(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) __free_page(bvec->bv_page); } +EXPORT_SYMBOL(bio_free_pages); /** * bio_uncopy_user - finish previously mapped bio diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 76f7534d1dd1..81d3db40cd7b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -361,12 +361,8 @@ static void __btree_node_write_done(struct closure *cl) static void btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io); - struct bio_vec *bv; - int n; - - bio_for_each_segment_all(bv, b->bio, n) - __free_page(bv->bv_page); + bio_free_pages(b->bio); __btree_node_write_done(cl); } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index c28df164701e..333a1e5f6ae6 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -107,9 +107,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) { char name[BDEVNAME_SIZE]; struct bio *check; - struct bio_vec bv, *bv2; + struct bio_vec bv; struct bvec_iter iter; - int i; check = bio_clone(bio, GFP_NOIO); if (!check) @@ -136,8 +135,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) kunmap_atomic(p1); } - bio_for_each_segment_all(bv2, check, i) - __free_page(bv2->bv_page); + bio_free_pages(check); out_put: bio_put(check); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 1881319f2298..5c4bddecfaf0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -44,11 +44,8 @@ static void write_moving_finish(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); struct bio *bio = &io->bio.bio; - struct bio_vec *bv; - int i; - bio_for_each_segment_all(bv, bio, i) - __free_page(bv->bv_page); + bio_free_pages(bio); if (io->op.replace_collision) trace_bcache_gc_copy_collision(&io->w->key); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4b177fe11ebb..40ffe5e424b3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -694,13 +694,8 @@ static void cached_dev_cache_miss_done(struct closure *cl) if (s->iop.replace_collision) bch_mark_cache_miss_collision(s->iop.c, s->d); - if (s->iop.bio) { - int i; - struct bio_vec *bv; - - bio_for_each_segment_all(bv, s->iop.bio, i) - __free_page(bv->bv_page); - } + if (s->iop.bio) + bio_free_pages(s->iop.bio); cached_dev_bio_complete(cl); } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d9fd2a62e5f6..e51644e503a5 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -128,11 +128,8 @@ static void write_dirty_finish(struct closure *cl) struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; - struct bio_vec *bv; - int i; - bio_for_each_segment_all(bv, &io->bio, i) - __free_page(bv->bv_page); + bio_free_pages(&io->bio); /* This is kind of a dumb way of signalling errors. */ if (KEY_DIRTY(&w->key)) { diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4ab68033f9d1..b52404159ccf 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -149,8 +149,6 @@ static void put_io_block(struct log_writes_c *lc) static void log_end_io(struct bio *bio) { struct log_writes_c *lc = bio->bi_private; - struct bio_vec *bvec; - int i; if (bio->bi_error) { unsigned long flags; @@ -161,9 +159,7 @@ static void log_end_io(struct bio *bio) spin_unlock_irqrestore(&lc->blocks_lock, flags); } - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); - + bio_free_pages(bio); put_io_block(lc); bio_put(bio); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 21dc00eb1989..1961d827dbd1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -145,12 +145,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; out_free_pages: - while (--j >= 0) { - struct bio_vec *bv; - - bio_for_each_segment_all(bv, r1_bio->bios[j], i) - __free_page(bv->bv_page); - } + while (--j >= 0) + bio_free_pages(r1_bio->bios[j]); out_free_bio: while (++j < pi->raid_disks) diff --git a/include/linux/bio.h b/include/linux/bio.h index e00721a2dce1..97cb48f03dc7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,6 +459,7 @@ static inline void bio_flush_dcache_pages(struct bio *bi) extern void bio_copy_data(struct bio *dst, struct bio *src); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); +extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, -- cgit From 005043ac31ba5bf6721b4ddca10ff2066e2ee2fe Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 21 Sep 2016 16:55:31 -0400 Subject: nbd: use BLK_MQ_F_BLOCKING We take a mutex when sending commands and send stuff over the network, we need to have queue_rq called asynchronously. Signed-off-by: Josef Bacik Fixes: fd8383fd88a2 ("nbd: convert to blkmq") Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4c6dd1a85ead..ccfcfc11399a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -941,7 +941,7 @@ static int __init nbd_init(void) nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_SG_MERGE; + BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); -- cgit From 0fe51ff269da745078e0ab2b90dfdf2a58d6f3e7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 2 Aug 2016 10:40:01 +0300 Subject: nvme-fabrics: rework nvmf_get_address() for variable options Revise nvmf_get_address() string to account for not all options being present. Signed-off-by: James Smart Acked-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index dc996761042f..2867b92cbdfb 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -109,8 +109,16 @@ static void nvmf_host_put(struct nvmf_host *host) */ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) { - return snprintf(buf, size, "traddr=%s,trsvcid=%s\n", - ctrl->opts->traddr, ctrl->opts->trsvcid); + int len = 0; + + if (ctrl->opts->mask & NVMF_OPT_TRADDR) + len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr); + if (ctrl->opts->mask & NVMF_OPT_TRSVCID) + len += snprintf(buf + len, size - len, "%strsvcid=%s", + (len) ? "," : "", ctrl->opts->trsvcid); + len += snprintf(buf + len, size - len, "\n"); + + return len; } EXPORT_SYMBOL_GPL(nvmf_get_address); -- cgit From 4a9f05c57f98e794763650056731a0023ebfab5f Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 2 Aug 2016 10:41:20 +0300 Subject: nvme-fabrics: revise host transport option descriptions Revise some of the comments so not so ethernet-network centric Signed-off-by: James Smart Acked-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 89df52c8be97..2755efde16ac 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -64,9 +64,10 @@ enum { * being added. * @subsysnqn: Hold the fully qualified NQN subystem name (format defined * in the NVMe specification, "NVMe Qualified Names"). - * @traddr: network address that will be used by the host to communicate - * to the added NVMe controller. - * @trsvcid: network port used for host-controller communication. + * @traddr: The transport-specific TRADDR field for a port on the + * subsystem which is adding a controller. + * @trsvcid: The transport-specific TRSVCID field for a port on the + * subsystem which is adding a controller. * @queue_size: Number of IO queue elements. * @nr_io_queues: Number of controller IO queues that will be established. * @reconnect_delay: Time between two consecutive reconnect attempts. -- cgit From 478bcb9388f2c3eedba34ed5811793400047f95d Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 2 Aug 2016 10:42:10 +0300 Subject: nvme-fabrics: Add host_traddr options field to host infrastructure Add the host_traddr field to allow specification of the host-port connection info for the transport. Will be used by FC transport. Signed-off-by: James Smart Acked-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 13 +++++++++++++ drivers/nvme/host/fabrics.h | 4 ++++ 2 files changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2867b92cbdfb..1c07b76cbdeb 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -116,6 +116,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) if (ctrl->opts->mask & NVMF_OPT_TRSVCID) len += snprintf(buf + len, size - len, "%strsvcid=%s", (len) ? "," : "", ctrl->opts->trsvcid); + if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) + len += snprintf(buf + len, size - len, "%shost_traddr=%s", + (len) ? "," : "", ctrl->opts->host_traddr); len += snprintf(buf + len, size - len, "\n"); return len; @@ -518,6 +521,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, + { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_ERR, NULL } }; @@ -674,6 +678,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->reconnect_delay = token; break; + case NVMF_OPT_HOST_TRADDR: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + opts->host_traddr = p; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -740,6 +752,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts) kfree(opts->traddr); kfree(opts->trsvcid); kfree(opts->subsysnqn); + kfree(opts->host_traddr); kfree(opts); } EXPORT_SYMBOL_GPL(nvmf_free_options); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 2755efde16ac..8f08c3a3406b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -52,6 +52,7 @@ enum { NVMF_OPT_KATO = 1 << 7, NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, + NVMF_OPT_HOST_TRADDR = 1 << 10, }; /** @@ -68,6 +69,8 @@ enum { * subsystem which is adding a controller. * @trsvcid: The transport-specific TRSVCID field for a port on the * subsystem which is adding a controller. + * @host_traddr: A transport-specific field identifying the NVME host port + * to use for the connection to the controller. * @queue_size: Number of IO queue elements. * @nr_io_queues: Number of controller IO queues that will be established. * @reconnect_delay: Time between two consecutive reconnect attempts. @@ -81,6 +84,7 @@ struct nvmf_ctrl_options { char *subsysnqn; char *traddr; char *trsvcid; + char *host_traddr; size_t queue_size; unsigned int nr_io_queues; unsigned int reconnect_delay; -- cgit From 2d79c7dc8fe5cf1158250a5fd25c02d781324cd3 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 1 Sep 2016 20:45:03 +0100 Subject: admin-cmd: Added smart-log command support. This patch implements the support for smart-log command (NVM Express 1.2.1-section 5.10.1.2 SMART / Health Information (Log Identifier 02h)) on the target for NVMe over Fabric. In current implementation host can retrieve following statistics:- 1. Data Units Read. 2. Data Units Written. 3. Host Read Commands. 4. Host Write Commands. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 47c564b5a289..7ab9c9381b98 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include #include "nvmet.h" u32 nvmet_get_log_page_len(struct nvme_command *cmd) @@ -29,8 +30,84 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + struct nvmet_ns *ns; + u64 host_reads, host_writes, data_units_read, data_units_written; + + status = NVME_SC_SUCCESS; + ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); + if (!ns) { + status = NVME_SC_INVALID_NS; + pr_err("nvmet : Counld not find namespace id : %d\n", + le32_to_cpu(req->cmd->get_log_page.nsid)); + goto out; + } + + host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); + data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); + host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); + data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + + put_unaligned_le64(host_reads, &slog->host_reads[0]); + put_unaligned_le64(data_units_read, &slog->data_units_read[0]); + put_unaligned_le64(host_writes, &slog->host_writes[0]); + put_unaligned_le64(data_units_written, &slog->data_units_written[0]); + nvmet_put_namespace(ns); +out: + return status; +} + +static u16 nvmet_get_smart_log_all(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + u64 host_reads = 0, host_writes = 0; + u64 data_units_read = 0, data_units_written = 0; + struct nvmet_ns *ns; + struct nvmet_ctrl *ctrl; + + status = NVME_SC_SUCCESS; + ctrl = req->sq->ctrl; + + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { + host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); + data_units_read += + part_stat_read(ns->bdev->bd_part, sectors[READ]); + host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); + data_units_written += + part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + + } + rcu_read_unlock(); + + put_unaligned_le64(host_reads, &slog->host_reads[0]); + put_unaligned_le64(data_units_read, &slog->data_units_read[0]); + put_unaligned_le64(host_writes, &slog->host_writes[0]); + put_unaligned_le64(data_units_written, &slog->data_units_written[0]); + + return status; +} + +static u16 nvmet_get_smart_log(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + + WARN_ON(req == NULL || slog == NULL); + if (req->cmd->get_log_page.nsid == 0xFFFFFFFF) + status = nvmet_get_smart_log_all(req, slog); + else + status = nvmet_get_smart_log_nsid(req, slog); + return status; +} + static void nvmet_execute_get_log_page(struct nvmet_req *req) { + struct nvme_smart_log *smart_log; size_t data_len = nvmet_get_log_page_len(req->cmd); void *buf; u16 status = 0; @@ -59,6 +136,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * available (e.g. units or commands read/written) those aren't * persistent over power loss. */ + if (data_len != sizeof(*smart_log)) { + status = NVME_SC_INTERNAL; + goto err; + } + smart_log = buf; + status = nvmet_get_smart_log(req, smart_log); + if (status) { + memset(buf, '\0', data_len); + goto err; + } break; case 0x03: /* @@ -73,6 +160,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) status = nvmet_copy_to_sgl(req, 0, buf, data_len); +err: kfree(buf); out: nvmet_req_complete(req, status); -- cgit From 9b349b080ca9777f6dab3da06d7fa4577f7d4c29 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 21 Sep 2016 11:06:32 -0700 Subject: nvmet: Use direct IO for writes We're designed to work with high-end devices where direct IO makes perfect sense. We noticed that we context switch by scheduling kblockd instead of going directly to the device without REQ_SYNC for writes. Signed-off-by: Sagi Grimberg Reviewed-by: Jens Axboe --- drivers/nvme/target/io-cmd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 2cd069b691ae..4132b6b98182 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -58,6 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; + op_flags = WRITE_ODIRECT; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { -- cgit From 2e5d0baa04845dc3a3d7dfa33d7663de270bb146 Mon Sep 17 00:00:00 2001 From: Alexander Solganik Date: Wed, 21 Sep 2016 14:12:38 -0700 Subject: nvmet: Make dsm number of ranges zero based This caused the nvmet request data length to be incorrect. Signed-off-by: Alexander Solganik Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/io-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4132b6b98182..4a96c2049b7b 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -206,7 +206,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) return 0; case nvme_cmd_dsm: req->execute = nvmet_execute_dsm; - req->data_len = le32_to_cpu(cmd->dsm.nr) * + req->data_len = le32_to_cpu(cmd->dsm.nr + 1) * sizeof(struct nvme_dsm_range); return 0; default: -- cgit From 26501db8dcbc3c63c0d8fb6c5bb098bc7d35d741 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 16 Sep 2016 11:16:09 -0700 Subject: nvme/scsi: Remove power management support As far as I can tell, there is basically nothing correct about this code. It misinterprets npss (off-by-one). It hardcodes a bunch of power states, which is nonsense, because they're all just indices into a table that software needs to parse. It completely ignores the distinction between operational and non-operational states. And, until 4.8, if all of the above magically succeeded, it would dereference a NULL pointer and OOPS. Since this code appears to be useless, just delete it. Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Acked-by: Jay Freyensee Tested-by: Jay Freyensee Signed-off-by: Jens Axboe --- drivers/nvme/host/scsi.c | 74 ++---------------------------------------------- 1 file changed, 3 insertions(+), 71 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index e947e298a737..44009105f8c8 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -72,15 +72,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define ALL_LUNS_RETURNED 0x02 #define ALL_WELL_KNOWN_LUNS_RETURNED 0x01 #define RESTRICTED_LUNS_RETURNED 0x00 -#define NVME_POWER_STATE_START_VALID 0x00 -#define NVME_POWER_STATE_ACTIVE 0x01 -#define NVME_POWER_STATE_IDLE 0x02 -#define NVME_POWER_STATE_STANDBY 0x03 -#define NVME_POWER_STATE_LU_CONTROL 0x07 -#define POWER_STATE_0 0 -#define POWER_STATE_1 1 -#define POWER_STATE_2 2 -#define POWER_STATE_3 3 #define DOWNLOAD_SAVE_ACTIVATE 0x05 #define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E #define ACTIVATE_DEFERRED_MICROCODE 0x0F @@ -1229,64 +1220,6 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, /* Start Stop Unit Helper Functions */ -static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 pc, u8 pcmod, u8 start) -{ - int res; - int nvme_sc; - struct nvme_id_ctrl *id_ctrl; - int lowest_pow_st; /* max npss = lowest power consumption */ - unsigned ps_desired = 0; - - nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - return res; - - lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); - kfree(id_ctrl); - - switch (pc) { - case NVME_POWER_STATE_START_VALID: - /* Action unspecified if POWER CONDITION MODIFIER != 0 */ - if (pcmod == 0 && start == 0x1) - ps_desired = POWER_STATE_0; - if (pcmod == 0 && start == 0x0) - ps_desired = lowest_pow_st; - break; - case NVME_POWER_STATE_ACTIVE: - /* Action unspecified if POWER CONDITION MODIFIER != 0 */ - if (pcmod == 0) - ps_desired = POWER_STATE_0; - break; - case NVME_POWER_STATE_IDLE: - /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ - if (pcmod == 0x0) - ps_desired = POWER_STATE_1; - else if (pcmod == 0x1) - ps_desired = POWER_STATE_2; - else if (pcmod == 0x2) - ps_desired = POWER_STATE_3; - break; - case NVME_POWER_STATE_STANDBY: - /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ - if (pcmod == 0x0) - ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2)); - else if (pcmod == 0x1) - ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1)); - break; - case NVME_POWER_STATE_LU_CONTROL: - default: - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - break; - } - nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0, - NULL); - return nvme_trans_status_code(hdr, nvme_sc); -} - static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 buffer_id) { @@ -2235,11 +2168,10 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - u8 immed, pcmod, pc, no_flush, start; + u8 immed, pcmod, no_flush, start; immed = cmd[1] & 0x01; pcmod = cmd[3] & 0x0f; - pc = (cmd[4] & 0xf0) >> 4; no_flush = cmd[4] & 0x04; start = cmd[4] & 0x01; @@ -2254,8 +2186,8 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (res) return res; } - /* Setup the expected power state transition */ - return nvme_trans_power_state(ns, hdr, pc, pcmod, start); + + return 0; } } -- cgit From 1a6fe74dfd1bb10afb41cbbbdc14890604be42a6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 16 Sep 2016 11:16:10 -0700 Subject: nvme: Pass pointers, not dma addresses, to nvme_get/set_features() Any user I can imagine that needs a buffer at all will want to pass a pointer directly. There are no currently callers that use buffers, so this change is painless, and it will make it much easier to start using features that use buffers (e.g. APST). Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Acked-by: Jay Freyensee Tested-by: Jay Freyensee Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 14 ++++++-------- drivers/nvme/host/nvme.h | 4 ++-- drivers/nvme/host/scsi.c | 6 +++--- 3 files changed, 11 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bd2156cbfc6c..4669c052239e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -599,7 +599,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, } int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result) + void *buffer, size_t buflen, u32 *result) { struct nvme_command c; struct nvme_completion cqe; @@ -608,10 +608,9 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; c.features.nsid = cpu_to_le32(nsid); - c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); @@ -619,7 +618,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, } int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result) + void *buffer, size_t buflen, u32 *result) { struct nvme_command c; struct nvme_completion cqe; @@ -627,12 +626,11 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; - c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, + buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; @@ -666,7 +664,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) u32 result; int status, nr_io_queues; - status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0, + status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); if (status < 0) return status; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bfd25dd73bca..b0a9ec681685 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -293,9 +293,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, struct nvme_id_ns **id); int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result); + void *buffer, size_t buflen, u32 *result); int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result); + void *buffer, size_t buflen, u32 *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index 44009105f8c8..c2a0a1c7d05d 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -906,7 +906,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(smart_log); /* Get Features for Temp Threshold */ - res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0, + res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0, &feature_resp); if (res != NVME_SC_SUCCESS) temp_c_thresh = LOG_TEMP_UNKNOWN; @@ -1039,7 +1039,7 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns, if (len < MODE_PAGE_CACHING_LEN) return -EINVAL; - nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0, + nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0, &feature_resp); res = nvme_trans_status_code(hdr, nvme_sc); if (res) @@ -1328,7 +1328,7 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, case MODE_PAGE_CACHING: dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, - dword11, 0, NULL); + dword11, NULL, 0, NULL); res = nvme_trans_status_code(hdr, nvme_sc); break; case MODE_PAGE_CONTROL: -- cgit