diff options
Diffstat (limited to 'drivers/block')
37 files changed, 944 insertions, 1205 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 9e2d0c6a3877..8b1714021498 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1781,15 +1781,13 @@ static int fd_alloc_disk(int drive, int system) { struct gendisk *disk; - disk = alloc_disk(1); - if (!disk) - goto out; - disk->queue = blk_mq_init_queue(&unit[drive].tag_set); - if (IS_ERR(disk->queue)) - goto out_put_disk; + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + if (IS_ERR(disk)) + return PTR_ERR(disk); disk->major = FLOPPY_MAJOR; disk->first_minor = drive + system; + disk->minors = 1; disk->fops = &floppy_fops; disk->events = DISK_EVENT_MEDIA_CHANGE; if (system) @@ -1802,12 +1800,6 @@ static int fd_alloc_disk(int drive, int system) unit[drive].gendisk[system] = disk; add_disk(disk); return 0; - -out_put_disk: - disk->queue = NULL; - put_disk(disk); -out: - return -ENOMEM; } static int fd_alloc_drive(int drive) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index c34e71b0c4a9..06b360f7123a 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -338,14 +338,13 @@ static const struct blk_mq_ops aoeblk_mq_ops = { .queue_rq = aoeblk_queue_rq, }; -/* alloc_disk and add_disk can sleep */ +/* blk_mq_alloc_disk and add_disk can sleep */ void aoeblk_gdalloc(void *vp) { struct aoedev *d = vp; struct gendisk *gd; mempool_t *mp; - struct request_queue *q; struct blk_mq_tag_set *set; ulong flags; int late = 0; @@ -362,19 +361,12 @@ aoeblk_gdalloc(void *vp) if (late) return; - gd = alloc_disk(AOE_PARTITIONS); - if (gd == NULL) { - pr_err("aoe: cannot allocate disk structure for %ld.%d\n", - d->aoemajor, d->aoeminor); - goto err; - } - mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab, buf_pool_cache); if (mp == NULL) { printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", d->aoemajor, d->aoeminor); - goto err_disk; + goto err; } set = &d->tag_set; @@ -391,12 +383,11 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - q = blk_mq_init_queue(set); - if (IS_ERR(q)) { + gd = blk_mq_alloc_disk(set, d); + if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); - blk_mq_free_tag_set(set); - goto err_mempool; + goto err_tagset; } spin_lock_irqsave(&d->lock, flags); @@ -405,16 +396,16 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->flags & DEVFL_TKILL); WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); - blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - blk_queue_io_opt(q, SZ_2M); + blk_queue_max_hw_sectors(gd->queue, BLK_DEF_MAX_SECTORS); + blk_queue_io_opt(gd->queue, SZ_2M); d->bufpool = mp; - d->blkq = gd->queue = q; - q->queuedata = d; + d->blkq = gd->queue; d->gd = gd; if (aoe_maxsectors) - blk_queue_max_hw_sectors(q, aoe_maxsectors); + blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; + gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; set_capacity(gd, d->ssize); @@ -435,10 +426,10 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); return; +err_tagset: + blk_mq_free_tag_set(set); err_mempool: mempool_destroy(mp); -err_disk: - put_disk(gd); err: spin_lock_irqsave(&d->lock, flags); d->flags &= ~DEVFL_GD_NOW; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index ab41be625a53..8eea2529da20 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -140,10 +140,8 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags); } mp = kmemdup(msg, n, GFP_ATOMIC); - if (mp == NULL) { - printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n); + if (!mp) goto bail; - } em->msg = mp; em->flags |= EMFL_VALID; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index ecd77897a761..588889bea7c3 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1701,8 +1701,6 @@ aoecmd_init(void) goto ktiowq_fail; } - mutex_init(&ktio_spawn_lock); - for (i = 0; i < ncpus; i++) { INIT_LIST_HEAD(&iocq[i].head); spin_lock_init(&iocq[i].lock); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index e2ea2356da06..c5753c6bfe80 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -277,9 +277,8 @@ freedev(struct aoedev *d) if (d->gd) { aoedisk_rm_debugfs(d); del_gendisk(d->gd); - put_disk(d->gd); + blk_cleanup_disk(d->gd); blk_mq_free_tag_set(&d->tag_set); - blk_cleanup_queue(d->blkq); } t = d->targets; e = t + d->ntargets; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index d601e49f80e0..a093644ac39f 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1968,22 +1968,14 @@ static const struct blk_mq_ops ataflop_mq_ops = { static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - int ret; - - disk = alloc_disk(1); - if (!disk) - return -ENOMEM; - disk->queue = blk_mq_init_queue(&unit[drive].tag_set); - if (IS_ERR(disk->queue)) { - ret = PTR_ERR(disk->queue); - disk->queue = NULL; - put_disk(disk); - return ret; - } + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + if (IS_ERR(disk)) + return PTR_ERR(disk); disk->major = FLOPPY_MAJOR; disk->first_minor = drive + (type << 2); + disk->minors = 1; sprintf(disk->disk_name, "fd%d", drive); disk->fops = &floppy_fops; disk->events = DISK_EVENT_MEDIA_CHANGE; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 7562cf30b14e..95694113e38e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -38,9 +38,7 @@ * device). */ struct brd_device { - int brd_number; - - struct request_queue *brd_queue; + int brd_number; struct gendisk *brd_disk; struct list_head brd_list; @@ -372,7 +370,7 @@ static LIST_HEAD(brd_devices); static DEFINE_MUTEX(brd_devices_mutex); static struct dentry *brd_debugfs_dir; -static struct brd_device *brd_alloc(int i) +static int brd_alloc(int i) { struct brd_device *brd; struct gendisk *disk; @@ -380,64 +378,55 @@ static struct brd_device *brd_alloc(int i) brd = kzalloc(sizeof(*brd), GFP_KERNEL); if (!brd) - goto out; + return -ENOMEM; brd->brd_number = i; spin_lock_init(&brd->brd_lock); INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); - brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE); - if (!brd->brd_queue) - goto out_free_dev; - snprintf(buf, DISK_NAME_LEN, "ram%d", i); if (!IS_ERR_OR_NULL(brd_debugfs_dir)) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - /* This is so fdisk will align partitions on 4k, because of - * direct_access API needing 4k alignment, returning a PFN - * (This is only a problem on very small devices <= 4M, - * otherwise fdisk will align on 1M. Regardless this call - * is harmless) - */ - blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); - disk = brd->brd_disk = alloc_disk(max_part); + disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) - goto out_free_queue; + goto out_free_dev; + disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; + disk->minors = max_part; disk->fops = &brd_fops; disk->private_data = brd; disk->flags = GENHD_FL_EXT_DEVT; strlcpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); + + /* + * This is so fdisk will align partitions on 4k, because of + * direct_access API needing 4k alignment, returning a PFN + * (This is only a problem on very small devices <= 4M, + * otherwise fdisk will align on 1M. Regardless this call + * is harmless) + */ + blk_queue_physical_block_size(disk->queue, PAGE_SIZE); /* Tell the block layer that this is not a rotational device */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + add_disk(disk); + list_add_tail(&brd->brd_list, &brd_devices); - return brd; + return 0; -out_free_queue: - blk_cleanup_queue(brd->brd_queue); out_free_dev: kfree(brd); -out: - return NULL; -} - -static void brd_free(struct brd_device *brd) -{ - put_disk(brd->brd_disk); - blk_cleanup_queue(brd->brd_queue); - brd_free_pages(brd); - kfree(brd); + return -ENOMEM; } static void brd_probe(dev_t dev) { - struct brd_device *brd; int i = MINOR(dev) / max_part; + struct brd_device *brd; mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { @@ -445,13 +434,7 @@ static void brd_probe(dev_t dev) goto out_unlock; } - brd = brd_alloc(i); - if (brd) { - brd->brd_disk->queue = brd->brd_queue; - add_disk(brd->brd_disk); - list_add_tail(&brd->brd_list, &brd_devices); - } - + brd_alloc(i); out_unlock: mutex_unlock(&brd_devices_mutex); } @@ -460,7 +443,9 @@ static void brd_del_one(struct brd_device *brd) { list_del(&brd->brd_list); del_gendisk(brd->brd_disk); - brd_free(brd); + blk_cleanup_disk(brd->brd_disk); + brd_free_pages(brd); + kfree(brd); } static inline void brd_check_and_reset_par(void) @@ -485,7 +470,7 @@ static inline void brd_check_and_reset_par(void) static int __init brd_init(void) { struct brd_device *brd, *next; - int i; + int err, i; /* * brd module now has a feature to instantiate underlying device @@ -511,22 +496,11 @@ static int __init brd_init(void) mutex_lock(&brd_devices_mutex); for (i = 0; i < rd_nr; i++) { - brd = brd_alloc(i); - if (!brd) + err = brd_alloc(i); + if (err) goto out_free; - list_add_tail(&brd->brd_list, &brd_devices); } - /* point of no return */ - - list_for_each_entry(brd, &brd_devices, brd_list) { - /* - * associate with queue just before adding disk for - * avoiding to mess up failure path - */ - brd->brd_disk->queue = brd->brd_queue; - add_disk(brd->brd_disk); - } mutex_unlock(&brd_devices_mutex); pr_info("brd: module loaded\n"); @@ -535,15 +509,13 @@ static int __init brd_init(void) out_free: debugfs_remove_recursive(brd_debugfs_dir); - list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { - list_del(&brd->brd_list); - brd_free(brd); - } + list_for_each_entry_safe(brd, next, &brd_devices, brd_list) + brd_del_one(brd); mutex_unlock(&brd_devices_mutex); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module NOT loaded !!!\n"); - return -ENOMEM; + return err; } static void __exit brd_exit(void) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index de463773b530..55234a558e98 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2231,8 +2231,7 @@ void drbd_destroy_device(struct kref *kref) if (device->bitmap) /* should no longer be there. */ drbd_bm_cleanup(device); __free_page(device->md_io.page); - put_disk(device->vdisk); - blk_cleanup_queue(device->rq_queue); + blk_cleanup_disk(device->vdisk); kfree(device->rs_plan_s); /* not for_each_connection(connection, resource): @@ -2701,7 +2700,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig struct drbd_device *device; struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; - struct request_queue *q; int id; int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; @@ -2723,29 +2721,26 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(NUMA_NO_NODE); - if (!q) - goto out_no_q; - device->rq_queue = q; - - disk = alloc_disk(1); + disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) goto out_no_disk; + device->vdisk = disk; + device->rq_queue = disk->queue; set_disk_ro(disk, true); - disk->queue = q; disk->major = DRBD_MAJOR; disk->first_minor = minor; + disk->minors = 1; disk->fops = &drbd_ops; sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; - blk_queue_write_cache(q, true, true); + blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ - blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); + blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) @@ -2834,10 +2829,8 @@ out_no_minor_idr: out_no_bitmap: __free_page(device->md_io.page); out_no_io_page: - put_disk(disk); + blk_cleanup_disk(disk); out_no_disk: - blk_cleanup_queue(q); -out_no_q: kref_put(&resource->kref, drbd_destroy_resource); kfree(device); return err; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 69284ebba786..1f740e42e457 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3770,10 +3770,8 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in } new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); - if (!new_net_conf) { - drbd_err(connection, "Allocation of new net_conf failed\n"); + if (!new_net_conf) goto disconnect; - } mutex_lock(&connection->data.mutex); mutex_lock(&connection->resource->conf_update); @@ -4020,10 +4018,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i if (verify_tfm || csums_tfm) { new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); - if (!new_net_conf) { - drbd_err(device, "Allocation of new net_conf failed\n"); + if (!new_net_conf) goto disconnect; - } *new_net_conf = *old_net_conf; @@ -4161,7 +4157,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { - drbd_err(device, "Allocation of new disk_conf failed\n"); put_ldev(device); return -ENOMEM; } @@ -4288,10 +4283,8 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info device = peer_device->device; p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); - if (!p_uuid) { - drbd_err(device, "kmalloc of p_uuid failed\n"); + if (!p_uuid) return false; - } for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) p_uuid[i] = be64_to_cpu(p->uuid[i]); @@ -5484,8 +5477,7 @@ static int drbd_do_auth(struct drbd_connection *connection) } peers_ch = kmalloc(pi.size, GFP_NOIO); - if (peers_ch == NULL) { - drbd_err(connection, "kmalloc of peers_ch failed\n"); + if (!peers_ch) { rv = -1; goto fail; } @@ -5504,8 +5496,7 @@ static int drbd_do_auth(struct drbd_connection *connection) resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); - if (response == NULL) { - drbd_err(connection, "kmalloc of response failed\n"); + if (!response) { rv = -1; goto fail; } @@ -5552,8 +5543,7 @@ static int drbd_do_auth(struct drbd_connection *connection) } right_response = kmalloc(resp_size, GFP_NOIO); - if (right_response == NULL) { - drbd_err(connection, "kmalloc of right_response failed\n"); + if (!right_response) { rv = -1; goto fail; } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8a9d22207c59..87460e0e5c72 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2123,6 +2123,7 @@ static void format_interrupt(void) switch (interpret_errors()) { case 1: cont->error(); + break; case 2: break; case 0: @@ -2330,7 +2331,6 @@ static void rw_interrupt(void) if (!drive_state[current_drive].first_read_date) drive_state[current_drive].first_read_date = jiffies; - nr_sectors = 0; ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); if (reply_buffer[ST1] & ST1_EOC) @@ -4491,23 +4491,15 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - int err; - - disk = alloc_disk(1); - if (!disk) - return -ENOMEM; - disk->queue = blk_mq_init_queue(&tag_sets[drive]); - if (IS_ERR(disk->queue)) { - err = PTR_ERR(disk->queue); - disk->queue = NULL; - put_disk(disk); - return err; - } + disk = blk_mq_alloc_disk(&tag_sets[drive], NULL); + if (IS_ERR(disk)) + return PTR_ERR(disk); blk_queue_max_hw_sectors(disk->queue, 64); disk->major = FLOPPY_MAJOR; disk->first_minor = TOMINOR(drive) | (type << 2); + disk->minors = 1; disk->fops = &floppy_fops; disk->events = DISK_EVENT_MEDIA_CHANGE; if (type) @@ -4727,10 +4719,8 @@ out_put_disk: if (!disks[drive][0]) break; del_timer_sync(&motor_off_timer[drive]); - blk_cleanup_queue(disks[drive][0]->queue); - disks[drive][0]->queue = NULL; + blk_cleanup_disk(disks[drive][0]); blk_mq_free_tag_set(&tag_sets[drive]); - put_disk(disks[drive][0]); } return err; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d58d68f3c7cd..f0cdff0c5fbf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -71,7 +71,6 @@ #include <linux/writeback.h> #include <linux/completion.h> #include <linux/highmem.h> -#include <linux/kthread.h> #include <linux/splice.h> #include <linux/sysfs.h> #include <linux/miscdevice.h> @@ -79,13 +78,57 @@ #include <linux/uio.h> #include <linux/ioprio.h> #include <linux/blk-cgroup.h> +#include <linux/sched/mm.h> #include "loop.h" #include <linux/uaccess.h> +#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) + static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -515,8 +558,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - if (cmd->css) - css_put(cmd->css); cmd->ret = ret; lo_rw_aio_do_completion(cmd); } @@ -577,8 +618,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_complete = lo_rw_aio_complete; cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); - if (cmd->css) - kthread_associate_blkcg(cmd->css); if (rw == WRITE) ret = call_write_iter(file, &cmd->iocb, &iter); @@ -586,7 +625,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ret = call_read_iter(file, &cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); - kthread_associate_blkcg(NULL); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); @@ -647,14 +685,13 @@ static inline void loop_update_dio(struct loop_device *lo) lo->use_dio); } -static void loop_reread_partitions(struct loop_device *lo, - struct block_device *bdev) +static void loop_reread_partitions(struct loop_device *lo) { int rc; - mutex_lock(&bdev->bd_mutex); - rc = bdev_disk_changed(bdev, false); - mutex_unlock(&bdev->bd_mutex); + mutex_lock(&lo->lo_disk->open_mutex); + rc = bdev_disk_changed(lo->lo_disk, false); + mutex_unlock(&lo->lo_disk->open_mutex); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -676,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -701,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -717,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -744,21 +783,30 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping - * the file ref can take bd_mutex which creates circular locking + * the file ref can take open_mutex which creates circular locking * dependency. */ fput(old_file); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -921,27 +969,100 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_alignment = 0; } -static void loop_unprepare_queue(struct loop_device *lo) +struct loop_worker { + struct rb_node rb_node; + struct work_struct work; + struct list_head cmd_list; + struct list_head idle_list; + struct loop_device *lo; + struct cgroup_subsys_state *blkcg_css; + unsigned long last_ran_at; +}; + +static void loop_workfn(struct work_struct *work); +static void loop_rootcg_workfn(struct work_struct *work); +static void loop_free_idle_workers(struct timer_list *timer); + +#ifdef CONFIG_BLK_CGROUP +static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { - kthread_flush_worker(&lo->worker); - kthread_stop(lo->worker_task); + return !css || css == blkcg_root_css; } - -static int loop_kthread_worker_fn(void *worker_ptr) +#else +static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { - current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - return kthread_worker_fn(worker_ptr); + return !css; } +#endif -static int loop_prepare_queue(struct loop_device *lo) +static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) { - kthread_init_worker(&lo->worker); - lo->worker_task = kthread_run(loop_kthread_worker_fn, - &lo->worker, "loop%d", lo->lo_number); - if (IS_ERR(lo->worker_task)) - return -ENOMEM; - set_user_nice(lo->worker_task, MIN_NICE); - return 0; + struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL; + struct loop_worker *cur_worker, *worker = NULL; + struct work_struct *work; + struct list_head *cmd_list; + + spin_lock_irq(&lo->lo_work_lock); + + if (queue_on_root_worker(cmd->blkcg_css)) + goto queue_work; + + node = &lo->worker_tree.rb_node; + + while (*node) { + parent = *node; + cur_worker = container_of(*node, struct loop_worker, rb_node); + if (cur_worker->blkcg_css == cmd->blkcg_css) { + worker = cur_worker; + break; + } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) { + node = &(*node)->rb_left; + } else { + node = &(*node)->rb_right; + } + } + if (worker) + goto queue_work; + + worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN); + /* + * In the event we cannot allocate a worker, just queue on the + * rootcg worker and issue the I/O as the rootcg + */ + if (!worker) { + cmd->blkcg_css = NULL; + if (cmd->memcg_css) + css_put(cmd->memcg_css); + cmd->memcg_css = NULL; + goto queue_work; + } + + worker->blkcg_css = cmd->blkcg_css; + css_get(worker->blkcg_css); + INIT_WORK(&worker->work, loop_workfn); + INIT_LIST_HEAD(&worker->cmd_list); + INIT_LIST_HEAD(&worker->idle_list); + worker->lo = lo; + rb_link_node(&worker->rb_node, parent, node); + rb_insert_color(&worker->rb_node, &lo->worker_tree); +queue_work: + if (worker) { + /* + * We need to remove from the idle list here while + * holding the lock so that the idle timer doesn't + * free the worker + */ + if (!list_empty(&worker->idle_list)) + list_del_init(&worker->idle_list); + work = &worker->work; + cmd_list = &worker->cmd_list; + } else { + work = &lo->rootcg_work; + cmd_list = &lo->rootcg_cmd_list; + } + list_add_tail(&cmd->list_entry, cmd_list); + queue_work(lo->workqueue, work); + spin_unlock_irq(&lo->lo_work_lock); } static void loop_update_rotational(struct loop_device *lo) @@ -1067,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1093,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1127,12 +1248,23 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, !file->f_op->write_iter) lo->lo_flags |= LO_FLAGS_READ_ONLY; - error = loop_prepare_queue(lo); - if (error) + lo->workqueue = alloc_workqueue("loop%d", + WQ_UNBOUND | WQ_FREEZABLE, + 0, + lo->lo_number); + if (!lo->workqueue) { + error = -ENOMEM; goto out_unlock; + } set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn); + INIT_LIST_HEAD(&lo->rootcg_cmd_list); + INIT_LIST_HEAD(&lo->idle_worker_list); + lo->worker_tree = RB_ROOT; + timer_setup(&lo->timer, loop_free_idle_workers, + TIMER_DEFERRABLE); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; lo->lo_backing_file = file; @@ -1154,6 +1286,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, blk_queue_physical_block_size(lo->lo_queue, bsize); blk_queue_io_min(lo->lo_queue, bsize); + loop_config_discard(lo); loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1161,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1172,21 +1308,20 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1200,6 +1335,19 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int err = 0; bool partscan = false; int lo_number; + struct loop_worker *pos, *worker; + + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { @@ -1219,6 +1367,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); + destroy_workqueue(lo->workqueue); + spin_lock_irq(&lo->lo_work_lock); + list_for_each_entry_safe(worker, pos, &lo->idle_worker_list, + idle_list) { + list_del(&worker->idle_list); + rb_erase(&worker->rb_node, &lo->worker_tree); + css_put(worker->blkcg_css); + kfree(worker); + } + spin_unlock_irq(&lo->lo_work_lock); + del_timer_sync(&lo->timer); + spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); @@ -1255,12 +1415,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; lo_number = lo->lo_number; - loop_unprepare_queue(lo); out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) { /* - * bd_mutex has been held already in release path, so don't + * open_mutex has been held already in release path, so don't * acquire it if this function is called in such case. * * If the reread partition isn't from release path, lo_refcnt @@ -1268,10 +1427,10 @@ out_unlock: * current holder is released. */ if (!release) - mutex_lock(&bdev->bd_mutex); - err = bdev_disk_changed(bdev, false); + mutex_lock(&lo->lo_disk->open_mutex); + err = bdev_disk_changed(lo->lo_disk, false); if (!release) - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&lo->lo_disk->open_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo_number, err); @@ -1298,7 +1457,7 @@ out_unlock: /* * Need not hold lo_mutex to fput backing file. Calling fput holding * lo_mutex triggers a circular lock dependency possibility warning as - * fput can take bd_mutex which is usually taken before lo_mutex. + * fput can take open_mutex which is usually taken before lo_mutex. */ if (filp) fput(filp); @@ -1341,7 +1500,6 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - struct block_device *bdev; kuid_t uid = current_uid(); int prev_lo_flags; bool partscan = false; @@ -1410,13 +1568,12 @@ out_unfreeze: if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - bdev = lo->lo_device; partscan = true; } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); return err; } @@ -1879,29 +2036,18 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo; + struct loop_device *lo = bdev->bd_disk->private_data; int err; - /* - * take loop_ctl_mutex to protect lo pointer from race with - * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention - * release it prior to updating lo->lo_refcnt. - */ - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - return err; - lo = bdev->bd_disk->private_data; - if (!lo) { - mutex_unlock(&loop_ctl_mutex); - return -ENXIO; - } err = mutex_lock_killable(&lo->lo_mutex); - mutex_unlock(&loop_ctl_mutex); if (err) return err; - atomic_inc(&lo->lo_refcnt); + if (lo->lo_state == Lo_deleting) + err = -ENXIO; + else + atomic_inc(&lo->lo_refcnt); mutex_unlock(&lo->lo_mutex); - return 0; + return err; } static void lo_release(struct gendisk *disk, fmode_t mode) @@ -2019,14 +2165,19 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, } /* always use the first bio's css */ + cmd->blkcg_css = NULL; + cmd->memcg_css = NULL; #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { - cmd->css = &bio_blkcg(rq->bio)->css; - css_get(cmd->css); - } else + if (rq->bio && rq->bio->bi_blkg) { + cmd->blkcg_css = &bio_blkcg(rq->bio)->css; +#ifdef CONFIG_MEMCG + cmd->memcg_css = + cgroup_get_e_css(cmd->blkcg_css->cgroup, + &memory_cgrp_subsys); +#endif + } #endif - cmd->css = NULL; - kthread_queue_work(&lo->worker, &cmd->work); + loop_queue_work(lo, cmd); return BLK_STS_OK; } @@ -2037,13 +2188,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd) const bool write = op_is_write(req_op(rq)); struct loop_device *lo = rq->q->queuedata; int ret = 0; + struct mem_cgroup *old_memcg = NULL; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed; } + if (cmd->blkcg_css) + kthread_associate_blkcg(cmd->blkcg_css); + if (cmd->memcg_css) + old_memcg = set_active_memcg( + mem_cgroup_from_css(cmd->memcg_css)); + ret = do_req_filebacked(lo, rq); + + if (cmd->blkcg_css) + kthread_associate_blkcg(NULL); + + if (cmd->memcg_css) { + set_active_memcg(old_memcg); + css_put(cmd->memcg_css); + } failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { @@ -2056,30 +2222,86 @@ static void loop_handle_cmd(struct loop_cmd *cmd) } } -static void loop_queue_work(struct kthread_work *work) +static void loop_set_timer(struct loop_device *lo) +{ + timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT); +} + +static void loop_process_work(struct loop_worker *worker, + struct list_head *cmd_list, struct loop_device *lo) +{ + int orig_flags = current->flags; + struct loop_cmd *cmd; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + spin_lock_irq(&lo->lo_work_lock); + while (!list_empty(cmd_list)) { + cmd = container_of( + cmd_list->next, struct loop_cmd, list_entry); + list_del(cmd_list->next); + spin_unlock_irq(&lo->lo_work_lock); + + loop_handle_cmd(cmd); + cond_resched(); + + spin_lock_irq(&lo->lo_work_lock); + } + + /* + * We only add to the idle list if there are no pending cmds + * *and* the worker will not run again which ensures that it + * is safe to free any worker on the idle list + */ + if (worker && !work_pending(&worker->work)) { + worker->last_ran_at = jiffies; + list_add_tail(&worker->idle_list, &lo->idle_worker_list); + loop_set_timer(lo); + } + spin_unlock_irq(&lo->lo_work_lock); + current->flags = orig_flags; +} + +static void loop_workfn(struct work_struct *work) { - struct loop_cmd *cmd = - container_of(work, struct loop_cmd, work); + struct loop_worker *worker = + container_of(work, struct loop_worker, work); + loop_process_work(worker, &worker->cmd_list, worker->lo); +} - loop_handle_cmd(cmd); +static void loop_rootcg_workfn(struct work_struct *work) +{ + struct loop_device *lo = + container_of(work, struct loop_device, rootcg_work); + loop_process_work(NULL, &lo->rootcg_cmd_list, lo); } -static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) +static void loop_free_idle_workers(struct timer_list *timer) { - struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct loop_device *lo = container_of(timer, struct loop_device, timer); + struct loop_worker *pos, *worker; - kthread_init_work(&cmd->work, loop_queue_work); - return 0; + spin_lock_irq(&lo->lo_work_lock); + list_for_each_entry_safe(worker, pos, &lo->idle_worker_list, + idle_list) { + if (time_is_after_jiffies(worker->last_ran_at + + LOOP_IDLE_WORKER_TIMEOUT)) + break; + list_del(&worker->idle_list); + rb_erase(&worker->rb_node, &lo->worker_tree); + css_put(worker->blkcg_css); + kfree(worker); + } + if (!list_empty(&lo->idle_worker_list)) + loop_set_timer(lo); + spin_unlock_irq(&lo->lo_work_lock); } static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, - .init_request = loop_init_request, .complete = lo_complete_rq, }; -static int loop_add(struct loop_device **l, int i) +static int loop_add(int i) { struct loop_device *lo; struct gendisk *disk; @@ -2089,9 +2311,12 @@ static int loop_add(struct loop_device **l, int i) lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) goto out; - lo->lo_state = Lo_unbound; + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + goto out_free_dev; + /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); @@ -2101,7 +2326,7 @@ static int loop_add(struct loop_device **l, int i) err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } if (err < 0) - goto out_free_dev; + goto out_unlock; i = err; err = -ENOMEM; @@ -2117,12 +2342,12 @@ static int loop_add(struct loop_device **l, int i) if (err) goto out_free_idr; - lo->lo_queue = blk_mq_init_queue(&lo->tag_set); - if (IS_ERR(lo->lo_queue)) { - err = PTR_ERR(lo->lo_queue); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_cleanup_tags; } - lo->lo_queue->queuedata = lo; + lo->lo_queue = lo->lo_disk->queue; blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS); @@ -2134,11 +2359,6 @@ static int loop_add(struct loop_device **l, int i) */ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); - err = -ENOMEM; - disk = lo->lo_disk = alloc_disk(1 << part_shift); - if (!disk) - goto out_free_queue; - /* * Disable partition scanning by default. The in-kernel partition * scanning can be requested individually per-device during its @@ -2164,22 +2384,24 @@ static int loop_add(struct loop_device **l, int i) mutex_init(&lo->lo_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); + spin_lock_init(&lo->lo_work_lock); disk->major = LOOP_MAJOR; disk->first_minor = i << part_shift; + disk->minors = 1 << part_shift; disk->fops = &lo_fops; disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); add_disk(disk); - *l = lo; - return lo->lo_number; + mutex_unlock(&loop_ctl_mutex); + return i; -out_free_queue: - blk_cleanup_queue(lo->lo_queue); out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: idr_remove(&loop_index_idr, i); +out_unlock: + mutex_unlock(&loop_ctl_mutex); out_free_dev: kfree(lo); out: @@ -2189,116 +2411,92 @@ out: static void loop_remove(struct loop_device *lo) { del_gendisk(lo->lo_disk); - blk_cleanup_queue(lo->lo_queue); + blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); - put_disk(lo->lo_disk); mutex_destroy(&lo->lo_mutex); kfree(lo); } -static int find_free_cb(int id, void *ptr, void *data) +static void loop_probe(dev_t dev) { - struct loop_device *lo = ptr; - struct loop_device **l = data; + int idx = MINOR(dev) >> part_shift; - if (lo->lo_state == Lo_unbound) { - *l = lo; - return 1; - } - return 0; + if (max_loop && idx >= max_loop) + return; + loop_add(idx); } -static int loop_lookup(struct loop_device **l, int i) +static int loop_control_remove(int idx) { struct loop_device *lo; - int ret = -ENODEV; + int ret; - if (i < 0) { - int err; + if (idx < 0) { + pr_warn("deleting an unspecified loop device is not supported.\n"); + return -EINVAL; + } + + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; - err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); - if (err == 1) { - *l = lo; - ret = lo->lo_number; - } - goto out; + lo = idr_find(&loop_index_idr, idx); + if (!lo) { + ret = -ENODEV; + goto out_unlock_ctrl; } - /* lookup and return a specific i */ - lo = idr_find(&loop_index_idr, i); - if (lo) { - *l = lo; - ret = lo->lo_number; + ret = mutex_lock_killable(&lo->lo_mutex); + if (ret) + goto out_unlock_ctrl; + if (lo->lo_state != Lo_unbound || + atomic_read(&lo->lo_refcnt) > 0) { + mutex_unlock(&lo->lo_mutex); + ret = -EBUSY; + goto out_unlock_ctrl; } -out: + lo->lo_state = Lo_deleting; + mutex_unlock(&lo->lo_mutex); + + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); +out_unlock_ctrl: + mutex_unlock(&loop_ctl_mutex); return ret; } -static void loop_probe(dev_t dev) +static int loop_control_get_free(int idx) { - int idx = MINOR(dev) >> part_shift; struct loop_device *lo; + int id, ret; - if (max_loop && idx >= max_loop) - return; - - mutex_lock(&loop_ctl_mutex); - if (loop_lookup(&lo, idx) < 0) - loop_add(&lo, idx); + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + idr_for_each_entry(&loop_index_idr, lo, id) { + if (lo->lo_state == Lo_unbound) + goto found; + } + mutex_unlock(&loop_ctl_mutex); + return loop_add(-1); +found: mutex_unlock(&loop_ctl_mutex); + return id; } static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { - struct loop_device *lo; - int ret; - - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; - - ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: - ret = loop_lookup(&lo, parm); - if (ret >= 0) { - ret = -EEXIST; - break; - } - ret = loop_add(&lo, parm); - break; + return loop_add(parm); case LOOP_CTL_REMOVE: - ret = loop_lookup(&lo, parm); - if (ret < 0) - break; - ret = mutex_lock_killable(&lo->lo_mutex); - if (ret) - break; - if (lo->lo_state != Lo_unbound) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - if (atomic_read(&lo->lo_refcnt) > 0) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_mutex); - idr_remove(&loop_index_idr, lo->lo_number); - loop_remove(lo); - break; + return loop_control_remove(parm); case LOOP_CTL_GET_FREE: - ret = loop_lookup(&lo, -1); - if (ret >= 0) - break; - ret = loop_add(&lo, -1); + return loop_control_get_free(parm); + default: + return -ENOSYS; } - mutex_unlock(&loop_ctl_mutex); - - return ret; } static const struct file_operations loop_ctl_fops = { @@ -2321,7 +2519,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - struct loop_device *lo; int err; part_shift = 0; @@ -2373,10 +2570,8 @@ static int __init loop_init(void) } /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) - loop_add(&lo, i); - mutex_unlock(&loop_ctl_mutex); + loop_add(i); printk(KERN_INFO "loop: module loaded\n"); return 0; @@ -2387,26 +2582,20 @@ err_out: return err; } -static int loop_exit_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - - loop_remove(lo); - return 0; -} - static void __exit loop_exit(void) { - mutex_lock(&loop_ctl_mutex); - - idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); - idr_destroy(&loop_index_idr); + struct loop_device *lo; + int id; unregister_blkdev(LOOP_MAJOR, "loop"); - misc_deregister(&loop_misc); + mutex_lock(&loop_ctl_mutex); + idr_for_each_entry(&loop_index_idr, lo, id) + loop_remove(lo); mutex_unlock(&loop_ctl_mutex); + + idr_destroy(&loop_index_idr); } module_init(loop_init); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index a3c04f310672..1988899db63a 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -14,7 +14,6 @@ #include <linux/blk-mq.h> #include <linux/spinlock.h> #include <linux/mutex.h> -#include <linux/kthread.h> #include <uapi/linux/loop.h> /* Possible states of device */ @@ -22,6 +21,7 @@ enum { Lo_unbound, Lo_bound, Lo_rundown, + Lo_deleting, }; struct loop_func_table; @@ -54,8 +54,13 @@ struct loop_device { spinlock_t lo_lock; int lo_state; - struct kthread_worker worker; - struct task_struct *worker_task; + spinlock_t lo_work_lock; + struct workqueue_struct *workqueue; + struct work_struct rootcg_work; + struct list_head rootcg_cmd_list; + struct list_head idle_worker_list; + struct rb_root worker_tree; + struct timer_list timer; bool use_dio; bool sysfs_inited; @@ -66,13 +71,14 @@ struct loop_device { }; struct loop_cmd { - struct kthread_work work; + struct list_head list_entry; bool use_aio; /* use AIO interface to handle I/O */ atomic_t ref; /* only for aio */ long ret; struct kiocb iocb; struct bio_vec *bvec; - struct cgroup_subsys_state *css; + struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *memcg_css; }; /* Support for loadable transfer modules */ diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 589cb0f1e030..901855717cb5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2160,6 +2160,20 @@ static ssize_t mtip_hw_show_status(struct device *dev, static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL); +static struct attribute *mtip_disk_attrs[] = { + &dev_attr_status.attr, + NULL, +}; + +static const struct attribute_group mtip_disk_attr_group = { + .attrs = mtip_disk_attrs, +}; + +static const struct attribute_group *mtip_disk_attr_groups[] = { + &mtip_disk_attr_group, + NULL, +}; + /* debugsfs entries */ static ssize_t show_device_status(struct device_driver *drv, char *buf) @@ -2238,7 +2252,6 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { - struct driver_data *dd = (struct driver_data *)f->private_data; int size = *offset; char *buf; int rv = 0; @@ -2247,11 +2260,8 @@ static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, return 0; buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); - if (!buf) { - dev_err(&dd->pdev->dev, - "Memory allocation: status buffer\n"); + if (!buf) return -ENOMEM; - } size += show_device_status(NULL, buf); @@ -2277,11 +2287,8 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, return 0; buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); - if (!buf) { - dev_err(&dd->pdev->dev, - "Memory allocation: register buffer\n"); + if (!buf) return -ENOMEM; - } size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); @@ -2343,11 +2350,8 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, return 0; buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); - if (!buf) { - dev_err(&dd->pdev->dev, - "Memory allocation: flag buffer\n"); + if (!buf) return -ENOMEM; - } size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", dd->port->flags); @@ -2384,47 +2388,6 @@ static const struct file_operations mtip_flags_fops = { .llseek = no_llseek, }; -/* - * Create the sysfs related attributes. - * - * @dd Pointer to the driver data structure. - * @kobj Pointer to the kobj for the block device. - * - * return value - * 0 Operation completed successfully. - * -EINVAL Invalid parameter. - */ -static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) -{ - if (!kobj || !dd) - return -EINVAL; - - if (sysfs_create_file(kobj, &dev_attr_status.attr)) - dev_warn(&dd->pdev->dev, - "Error creating 'status' sysfs entry\n"); - return 0; -} - -/* - * Remove the sysfs related attributes. - * - * @dd Pointer to the driver data structure. - * @kobj Pointer to the kobj for the block device. - * - * return value - * 0 Operation completed successfully. - * -EINVAL Invalid parameter. - */ -static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) -{ - if (!kobj || !dd) - return -EINVAL; - - sysfs_remove_file(kobj, &dev_attr_status.attr); - - return 0; -} - static int mtip_hw_debugfs_init(struct driver_data *dd) { if (!dfs_parent) @@ -2884,11 +2847,8 @@ static int mtip_hw_init(struct driver_data *dd) dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, dd->numa_node); - if (!dd->port) { - dev_err(&dd->pdev->dev, - "Memory allocation: port structure\n"); + if (!dd->port) return -ENOMEM; - } /* Continue workqueue setup */ for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) @@ -3579,7 +3539,6 @@ static int mtip_block_initialize(struct driver_data *dd) int rv = 0, wait_for_rebuild = 0; sector_t capacity; unsigned int index = 0; - struct kobject *kobj; if (dd->disk) goto skip_create_disk; /* hw init done, before rebuild */ @@ -3589,35 +3548,6 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); - if (dd->disk == NULL) { - dev_err(&dd->pdev->dev, - "Unable to allocate gendisk structure\n"); - rv = -EINVAL; - goto alloc_disk_error; - } - - rv = ida_alloc(&rssd_index_ida, GFP_KERNEL); - if (rv < 0) - goto ida_get_error; - index = rv; - - rv = rssd_disk_name_format("rssd", - index, - dd->disk->disk_name, - DISK_NAME_LEN); - if (rv) - goto disk_index_error; - - dd->disk->major = dd->major; - dd->disk->first_minor = index * MTIP_MAX_MINORS; - dd->disk->minors = MTIP_MAX_MINORS; - dd->disk->fops = &mtip_block_ops; - dd->disk->private_data = dd; - dd->index = index; - - mtip_hw_debugfs_init(dd); - memset(&dd->tags, 0, sizeof(dd->tags)); dd->tags.ops = &mtip_mq_ops; dd->tags.nr_hw_queues = 1; @@ -3636,17 +3566,35 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - /* Allocate the request queue. */ - dd->queue = blk_mq_init_queue(&dd->tags); - if (IS_ERR(dd->queue)) { + dd->disk = blk_mq_alloc_disk(&dd->tags, dd); + if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); rv = -ENOMEM; goto block_queue_alloc_init_error; } + dd->queue = dd->disk->queue; + + rv = ida_alloc(&rssd_index_ida, GFP_KERNEL); + if (rv < 0) + goto ida_get_error; + index = rv; + + rv = rssd_disk_name_format("rssd", + index, + dd->disk->disk_name, + DISK_NAME_LEN); + if (rv) + goto disk_index_error; + + dd->disk->major = dd->major; + dd->disk->first_minor = index * MTIP_MAX_MINORS; + dd->disk->minors = MTIP_MAX_MINORS; + dd->disk->fops = &mtip_block_ops; + dd->disk->private_data = dd; + dd->index = index; - dd->disk->queue = dd->queue; - dd->queue->queuedata = dd; + mtip_hw_debugfs_init(dd); skip_create_disk: /* Initialize the protocol layer. */ @@ -3685,17 +3633,7 @@ skip_create_disk: set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk, NULL); - - /* - * Now that the disk is active, initialize any sysfs attributes - * managed by the protocol layer. - */ - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_init(dd, kobj); - kobject_put(kobj); - } + device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); @@ -3722,23 +3660,17 @@ start_service_thread: kthread_run_error: /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); - read_capacity_error: init_hw_cmds_error: - blk_cleanup_queue(dd->queue); -block_queue_alloc_init_error: - blk_mq_free_tag_set(&dd->tags); -block_queue_alloc_tag_error: mtip_hw_debugfs_exit(dd); disk_index_error: ida_free(&rssd_index_ida, index); - ida_get_error: - put_disk(dd->disk); - -alloc_disk_error: + blk_cleanup_disk(dd->disk); +block_queue_alloc_init_error: + blk_mq_free_tag_set(&dd->tags); +block_queue_alloc_tag_error: mtip_hw_exit(dd); /* De-initialize the protocol layer. */ - protocol_init_error: return rv; } @@ -3764,8 +3696,6 @@ static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) */ static int mtip_block_remove(struct driver_data *dd) { - struct kobject *kobj; - mtip_hw_debugfs_exit(dd); if (dd->mtip_svc_handler) { @@ -3774,15 +3704,6 @@ static int mtip_block_remove(struct driver_data *dd) kthread_stop(dd->mtip_svc_handler); } - /* Clean up the sysfs attributes, if created */ - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } - } - if (!dd->sr) { /* * Explicitly wait here for IOs to quiesce, @@ -4002,11 +3923,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); - if (dd == NULL) { - dev_err(&pdev->dev, - "Unable to allocate memory for driver data\n"); + if (!dd) return -ENOMEM; - } /* Attach the private data to this PCI device. */ pci_set_drvdata(pdev, dd); diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 47bdf324e962..c84be0028f63 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) n64cart_wait_dma(); - n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset); + n64cart_write_reg(PI_DRAM_REG, dma_addr); n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX); n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1); @@ -132,16 +132,12 @@ static int __init n64cart_probe(struct platform_device *pdev) if (!reg_base) return -EINVAL; - disk = alloc_disk(0); + disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) return -ENOMEM; - disk->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!disk->queue) - return -ENOMEM; - disk->first_minor = 0; - disk->flags = GENHD_FL_NO_PART_SCAN | GENHD_FL_EXT_DEVT; + disk->flags = GENHD_FL_NO_PART_SCAN; disk->fops = &n64cart_fops; disk->private_data = &pdev->dev; strcpy(disk->disk_name, "n64cart"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 45d2c28c8fc8..19f5d5a8b16a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -79,6 +79,7 @@ struct link_dead_args { #define NBD_RT_HAS_CONFIG_REF 4 #define NBD_RT_BOUND 5 #define NBD_RT_DISCONNECT_ON_CLOSE 6 +#define NBD_RT_HAS_BACKEND_FILE 7 #define NBD_DESTROY_ON_DISCONNECT 0 #define NBD_DISCONNECT_REQUESTED 1 @@ -119,6 +120,8 @@ struct nbd_device { struct completion *destroy_complete; unsigned long flags; + + char *backend; }; #define NBD_CMD_REQUEUED 1 @@ -216,18 +219,28 @@ static const struct device_attribute pid_attr = { .show = pid_show, }; +static ssize_t backend_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + + return sprintf(buf, "%s\n", nbd->backend ?: ""); +} + +static const struct device_attribute backend_attr = { + .attr = { .name = "backend", .mode = 0444}, + .show = backend_show, +}; + static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; - struct request_queue *q; if (disk) { - q = disk->queue; del_gendisk(disk); - blk_cleanup_queue(q); + blk_cleanup_disk(disk); blk_mq_free_tag_set(&nbd->tag_set); - disk->private_data = NULL; - put_disk(disk); } /* @@ -805,6 +818,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + /* don't abort one completed request */ + if (blk_mq_request_completed(req)) + return true; + mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -1215,6 +1232,12 @@ static void nbd_config_put(struct nbd_device *nbd) &config->runtime_flags)) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); nbd->task_recv = NULL; + if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE, + &config->runtime_flags)) { + device_remove_file(disk_to_dev(nbd->disk), &backend_attr); + kfree(nbd->backend); + nbd->backend = NULL; + } nbd_clear_sock(nbd); if (config->num_connections) { int i; @@ -1274,7 +1297,7 @@ static int nbd_start_device(struct nbd_device *nbd) error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (error) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + dev_err(disk_to_dev(nbd->disk), "device_create_file failed for pid!\n"); return error; } set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags); @@ -1646,15 +1669,25 @@ static int nbd_dev_add(int index) { struct nbd_device *nbd; struct gendisk *disk; - struct request_queue *q; int err = -ENOMEM; nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); if (!nbd) goto out; - disk = alloc_disk(1 << part_shift); - if (!disk) + nbd->tag_set.ops = &nbd_mq_ops; + nbd->tag_set.nr_hw_queues = 1; + nbd->tag_set.queue_depth = 128; + nbd->tag_set.numa_node = NUMA_NO_NODE; + nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); + nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_BLOCKING; + nbd->tag_set.driver_data = nbd; + nbd->destroy_complete = NULL; + nbd->backend = NULL; + + err = blk_mq_alloc_tag_set(&nbd->tag_set); + if (err) goto out_free_nbd; if (index >= 0) { @@ -1668,30 +1701,15 @@ static int nbd_dev_add(int index) index = err; } if (err < 0) - goto out_free_disk; - + goto out_free_tags; nbd->index = index; - nbd->disk = disk; - nbd->tag_set.ops = &nbd_mq_ops; - nbd->tag_set.nr_hw_queues = 1; - nbd->tag_set.queue_depth = 128; - nbd->tag_set.numa_node = NUMA_NO_NODE; - nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); - nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_BLOCKING; - nbd->tag_set.driver_data = nbd; - nbd->destroy_complete = NULL; - err = blk_mq_alloc_tag_set(&nbd->tag_set); - if (err) + disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_free_idr; - - q = blk_mq_init_queue(&nbd->tag_set); - if (IS_ERR(q)) { - err = PTR_ERR(q); - goto out_free_tags; } - disk->queue = q; + nbd->disk = disk; /* * Tell the block layer that we are not a rotational device @@ -1712,6 +1730,7 @@ static int nbd_dev_add(int index) INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; disk->first_minor = index << part_shift; + disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); @@ -1719,12 +1738,10 @@ static int nbd_dev_add(int index) nbd_total_devices++; return index; -out_free_tags: - blk_mq_free_tag_set(&nbd->tag_set); out_free_idr: idr_remove(&nbd_index_idr, index); -out_free_disk: - put_disk(disk); +out_free_tags: + blk_mq_free_tag_set(&nbd->tag_set); out_free_nbd: kfree(nbd); out: @@ -1754,6 +1771,7 @@ static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, + [NBD_ATTR_BACKEND_IDENTIFIER] = { .type = NLA_STRING}, }; static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { @@ -1956,6 +1974,23 @@ again: } } ret = nbd_start_device(nbd); + if (ret) + goto out; + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { + nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], + GFP_KERNEL); + if (!nbd->backend) { + ret = -ENOMEM; + goto out; + } + } + ret = device_create_file(disk_to_dev(nbd->disk), &backend_attr); + if (ret) { + dev_err(disk_to_dev(nbd->disk), + "device_create_file failed for backend!\n"); + goto out; + } + set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags); out: mutex_unlock(&nbd->config_lock); if (!ret) { @@ -1973,15 +2008,19 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) { mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); + sock_shutdown(nbd); /* * Make sure recv thread has finished, so it does not drop the last * config ref and try to destroy the workqueue from inside the work - * queue. + * queue. And this also ensure that we can safely call nbd_clear_que() + * to cancel the inflight I/Os. */ if (nbd->recv_workq) flush_workqueue(nbd->recv_workq); + nbd_clear_que(nbd); + nbd->task_setup = NULL; + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); @@ -2048,6 +2087,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) index); return -EINVAL; } + if (nbd->backend) { + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { + if (nla_strcmp(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], + nbd->backend)) { + mutex_unlock(&nbd_index_mutex); + dev_err(nbd_to_dev(nbd), + "backend image doesn't match with %s\n", + nbd->backend); + return -EINVAL; + } + } else { + mutex_unlock(&nbd_index_mutex); + dev_err(nbd_to_dev(nbd), "must specify backend\n"); + return -EINVAL; + } + } if (!refcount_inc_not_zero(&nbd->refs)) { mutex_unlock(&nbd_index_mutex); printk(KERN_ERR "nbd: device at index %d is going down\n", diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 5f006d9e1472..d734e9ee1546 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1597,11 +1597,10 @@ static void null_del_dev(struct nullb *nullb) null_restart_queue_async(nullb); } - blk_cleanup_queue(nullb->q); + blk_cleanup_disk(nullb->disk); if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - put_disk(nullb->disk); cleanup_queues(nullb); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); @@ -1700,22 +1699,19 @@ static int init_driver_queues(struct nullb *nullb) static int null_gendisk_register(struct nullb *nullb) { sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; - struct gendisk *disk; + struct gendisk *disk = nullb->disk; - disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); - if (!disk) - return -ENOMEM; set_capacity(disk, size); disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->major = null_major; disk->first_minor = nullb->index; + disk->minors = 1; if (queue_is_mq(nullb->q)) disk->fops = &null_rq_ops; else disk->fops = &null_bio_ops; disk->private_data = nullb; - disk->queue = nullb->q; strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); if (nullb->dev->zoned) { @@ -1851,23 +1847,25 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_queues; if (!null_setup_fault()) - goto out_cleanup_queues; + goto out_cleanup_tags; nullb->tag_set->timeout = 5 * HZ; - nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb); - if (IS_ERR(nullb->q)) { - rv = -ENOMEM; + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); goto out_cleanup_tags; } + nullb->q = nullb->disk->queue; } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue(dev->home_node); - if (!nullb->q) { - rv = -ENOMEM; + rv = -ENOMEM; + nullb->disk = blk_alloc_disk(nullb->dev->home_node); + if (!nullb->disk) goto out_cleanup_queues; - } + + nullb->q = nullb->disk->queue; rv = init_driver_queues(nullb); if (rv) - goto out_cleanup_blk_queue; + goto out_cleanup_disk; } if (dev->mbps) { @@ -1883,7 +1881,7 @@ static int null_add_dev(struct nullb_device *dev) if (dev->zoned) { rv = null_init_zoned_dev(dev, nullb->q); if (rv) - goto out_cleanup_blk_queue; + goto out_cleanup_disk; } nullb->q->queuedata = nullb; @@ -1921,8 +1919,8 @@ static int null_add_dev(struct nullb_device *dev) return 0; out_cleanup_zone: null_free_zoned_dev(dev); -out_cleanup_blk_queue: - blk_cleanup_queue(nullb->q); +out_cleanup_disk: + blk_cleanup_disk(nullb->disk); out_cleanup_tags: if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 70da8b86ce58..f9cdd11f02f5 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -309,21 +309,19 @@ static void pcd_init_units(void) pcd_drive_count = 0; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - struct gendisk *disk = alloc_disk(1); + struct gendisk *disk; - if (!disk) + if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE)) continue; - disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, - 1, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - disk->queue = NULL; - put_disk(disk); + disk = blk_mq_alloc_disk(&cd->tag_set, cd); + if (IS_ERR(disk)) { + blk_mq_free_tag_set(&cd->tag_set); continue; } INIT_LIST_HEAD(&cd->rq_list); - disk->queue->queuedata = cd; blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); cd->disk = disk; cd->pi = &cd->pia; @@ -343,6 +341,7 @@ static void pcd_init_units(void) cd->info.mask = 0; disk->major = major; disk->first_minor = unit; + disk->minors = 1; strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; @@ -759,10 +758,8 @@ static int pcd_detect(void) for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { if (!cd->disk) continue; - blk_cleanup_queue(cd->disk->queue); - cd->disk->queue = NULL; + blk_cleanup_disk(cd->disk); blk_mq_free_tag_set(&cd->tag_set); - put_disk(cd->disk); } pi_unregister_driver(par_drv); return -1; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 828a45ffe0e7..9b3298926356 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -879,18 +879,6 @@ static void pd_probe_drive(struct pd_unit *disk) { struct gendisk *p; - p = alloc_disk(1 << PD_BITS); - if (!p) - return; - - strcpy(p->disk_name, disk->name); - p->fops = &pd_fops; - p->major = major; - p->first_minor = (disk - pd) << PD_BITS; - p->events = DISK_EVENT_MEDIA_CHANGE; - disk->gd = p; - p->private_data = disk; - memset(&disk->tag_set, 0, sizeof(disk->tag_set)); disk->tag_set.ops = &pd_mq_ops; disk->tag_set.cmd_size = sizeof(struct pd_req); @@ -903,14 +891,21 @@ static void pd_probe_drive(struct pd_unit *disk) if (blk_mq_alloc_tag_set(&disk->tag_set)) return; - p->queue = blk_mq_init_queue(&disk->tag_set); - if (IS_ERR(p->queue)) { + p = blk_mq_alloc_disk(&disk->tag_set, disk); + if (!p) { blk_mq_free_tag_set(&disk->tag_set); - p->queue = NULL; return; } + disk->gd = p; + + strcpy(p->disk_name, disk->name); + p->fops = &pd_fops; + p->major = major; + p->first_minor = (disk - pd) << PD_BITS; + p->minors = 1 << PD_BITS; + p->events = DISK_EVENT_MEDIA_CHANGE; + p->private_data = disk; - p->queue->queuedata = disk; blk_queue_max_hw_sectors(p->queue, cluster); blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH); @@ -1019,9 +1014,8 @@ static void __exit pd_exit(void) if (p) { disk->gd = NULL; del_gendisk(p); - blk_cleanup_queue(p->queue); + blk_cleanup_disk(p); blk_mq_free_tag_set(&disk->tag_set); - put_disk(p); pi_release(disk->pi); } } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index bb09f21ce21a..d5b9c88ba76f 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -294,20 +294,17 @@ static void __init pf_init_units(void) for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) { struct gendisk *disk; - disk = alloc_disk(1); - if (!disk) + if (blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE)) continue; - disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops, - 1, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - disk->queue = NULL; - put_disk(disk); + disk = blk_mq_alloc_disk(&pf->tag_set, pf); + if (IS_ERR(disk)) { + blk_mq_free_tag_set(&pf->tag_set); continue; } INIT_LIST_HEAD(&pf->rq_list); - disk->queue->queuedata = pf; blk_queue_max_segments(disk->queue, cluster); blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); pf->disk = disk; @@ -318,6 +315,7 @@ static void __init pf_init_units(void) snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit); disk->major = major; disk->first_minor = unit; + disk->minors = 1; strcpy(disk->disk_name, pf->name); disk->fops = &pf_fops; disk->events = DISK_EVENT_MEDIA_CHANGE; @@ -766,10 +764,8 @@ static int pf_detect(void) for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { if (!pf->disk) continue; - blk_cleanup_queue(pf->disk->queue); - pf->disk->queue = NULL; + blk_cleanup_disk(pf->disk); blk_mq_free_tag_set(&pf->tag_set); - put_disk(pf->disk); } pi_unregister_driver(par_drv); return -1; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index bd3556585122..538446b652de 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,7 +704,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -2711,19 +2711,17 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_off = write_congestion_off; ret = -ENOMEM; - disk = alloc_disk(1); + disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) goto out_mem; pd->disk = disk; disk->major = pktdev_major; disk->first_minor = idx; + disk->minors = 1; disk->fops = &pktcdvd_ops; disk->flags = GENHD_FL_REMOVABLE; strcpy(disk->disk_name, pd->name); disk->private_data = pd; - disk->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!disk->queue) - goto out_mem2; pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); @@ -2746,7 +2744,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) return 0; out_mem2: - put_disk(disk); + blk_cleanup_disk(disk); out_mem: mempool_exit(&pd->rb_pool); kfree(pd); @@ -2796,8 +2794,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_dbg(1, pd, "writer unmapped\n"); del_gendisk(pd->disk); - blk_cleanup_queue(pd->disk->queue); - put_disk(pd->disk); + blk_cleanup_disk(pd->disk); mempool_exit(&pd->rb_pool); kfree(pd); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index ba3ece56cbb3..f374ea2c67ce 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -29,7 +29,6 @@ struct ps3disk_private { spinlock_t lock; /* Request queue spinlock */ - struct request_queue *queue; struct blk_mq_tag_set tag_set; struct gendisk *gendisk; unsigned int blocking_factor; @@ -267,7 +266,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) blk_mq_end_request(req, error); spin_unlock(&priv->lock); - blk_mq_run_hw_queues(priv->queue, true); + blk_mq_run_hw_queues(priv->gendisk->queue, true); return IRQ_HANDLED; } @@ -441,17 +440,20 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) ps3disk_identify(dev); - queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1, + error = blk_mq_alloc_sq_tag_set(&priv->tag_set, &ps3disk_mq_ops, 1, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(queue)) { - dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n", - __func__, __LINE__); - error = PTR_ERR(queue); + if (error) goto fail_teardown; + + gendisk = blk_mq_alloc_disk(&priv->tag_set, dev); + if (IS_ERR(gendisk)) { + dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", + __func__, __LINE__); + error = PTR_ERR(gendisk); + goto fail_free_tag_set; } - priv->queue = queue; - queue->queuedata = dev; + queue = gendisk->queue; blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); blk_queue_dma_alignment(queue, dev->blk_size-1); @@ -462,19 +464,11 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); - gendisk = alloc_disk(PS3DISK_MINORS); - if (!gendisk) { - dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__, - __LINE__); - error = -ENOMEM; - goto fail_cleanup_queue; - } - priv->gendisk = gendisk; gendisk->major = ps3disk_major; gendisk->first_minor = devidx * PS3DISK_MINORS; + gendisk->minors = PS3DISK_MINORS; gendisk->fops = &ps3disk_fops; - gendisk->queue = queue; gendisk->private_data = dev; snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, devidx+'a'); @@ -490,8 +484,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) device_add_disk(&dev->sbd.core, gendisk, NULL); return 0; -fail_cleanup_queue: - blk_cleanup_queue(queue); +fail_free_tag_set: blk_mq_free_tag_set(&priv->tag_set); fail_teardown: ps3stor_teardown(dev); @@ -517,9 +510,8 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev) &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); - blk_cleanup_queue(priv->queue); + blk_cleanup_disk(priv->gendisk); blk_mq_free_tag_set(&priv->tag_set); - put_disk(priv->gendisk); dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); ps3disk_sync_cache(dev); ps3stor_teardown(dev); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1d738999fb69..7fbf469651c4 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -67,7 +67,6 @@ struct ps3vram_cache { }; struct ps3vram_priv { - struct request_queue *queue; struct gendisk *gendisk; u64 size; @@ -613,7 +612,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv; int error, status; - struct request_queue *queue; struct gendisk *gendisk; u64 ddr_size, ddr_lpar, ctrl_lpar, info_lpar, reports_lpar, reports_size, xdr_lpar; @@ -736,33 +734,23 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - queue = blk_alloc_queue(NUMA_NO_NODE); - if (!queue) { - dev_err(&dev->core, "blk_alloc_queue failed\n"); - error = -ENOMEM; - goto out_cache_cleanup; - } - - priv->queue = queue; - blk_queue_max_segments(queue, BLK_MAX_SEGMENTS); - blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE); - blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS); - - gendisk = alloc_disk(1); + gendisk = blk_alloc_disk(NUMA_NO_NODE); if (!gendisk) { - dev_err(&dev->core, "alloc_disk failed\n"); + dev_err(&dev->core, "blk_alloc_disk failed\n"); error = -ENOMEM; - goto fail_cleanup_queue; + goto out_cache_cleanup; } priv->gendisk = gendisk; gendisk->major = ps3vram_major; - gendisk->first_minor = 0; + gendisk->minors = 1; gendisk->fops = &ps3vram_fops; - gendisk->queue = queue; gendisk->private_data = dev; strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name)); set_capacity(gendisk, priv->size >> 9); + blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS); + blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE); + blk_queue_max_hw_sectors(gendisk->queue, BLK_SAFE_MAX_SECTORS); dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); @@ -770,8 +758,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) device_add_disk(&dev->core, gendisk, NULL); return 0; -fail_cleanup_queue: - blk_cleanup_queue(queue); out_cache_cleanup: remove_proc_entry(DEVICE_NAME, NULL); ps3vram_cache_cleanup(dev); @@ -802,8 +788,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev) struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); del_gendisk(priv->gendisk); - put_disk(priv->gendisk); - blk_cleanup_queue(priv->queue); + blk_cleanup_disk(priv->gendisk); remove_proc_entry(DEVICE_NAME, NULL); ps3vram_cache_cleanup(dev); iounmap(priv->reports); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bbb88eb009e0..90b947c96402 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4100,8 +4100,6 @@ again: static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) { - bool need_wait; - dout("%s rbd_dev %p\n", __func__, rbd_dev); lockdep_assert_held_write(&rbd_dev->lock_rwsem); @@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) */ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_assert(!completion_done(&rbd_dev->releasing_wait)); - need_wait = !list_empty(&rbd_dev->running_list); - downgrade_write(&rbd_dev->lock_rwsem); - if (need_wait) - wait_for_completion(&rbd_dev->releasing_wait); - up_read(&rbd_dev->lock_rwsem); + if (list_empty(&rbd_dev->running_list)) + return true; + + up_write(&rbd_dev->lock_rwsem); + wait_for_completion(&rbd_dev->releasing_wait); down_write(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) @@ -4203,15 +4201,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - /* - * we already know that the remote client is - * the owner - */ - up_write(&rbd_dev->lock_rwsem); - return; + dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", + __func__, rbd_dev, cid.gid, cid.handle); + } else { + rbd_set_owner_cid(rbd_dev, &cid); } - - rbd_set_owner_cid(rbd_dev, &cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4236,14 +4230,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", + dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle, rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); - up_write(&rbd_dev->lock_rwsem); - return; + } else { + rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); } - - rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4750,9 +4742,8 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx, static void rbd_free_disk(struct rbd_device *rbd_dev) { - blk_cleanup_queue(rbd_dev->disk->queue); + blk_cleanup_disk(rbd_dev->disk); blk_mq_free_tag_set(&rbd_dev->tag_set); - put_disk(rbd_dev->disk); rbd_dev->disk = NULL; } @@ -4922,22 +4913,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; int err; - /* create gendisk info */ - disk = alloc_disk(single_major ? - (1 << RBD_SINGLE_MAJOR_PART_SHIFT) : - RBD_MINORS_PER_MAJOR); - if (!disk) - return -ENOMEM; - - snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", - rbd_dev->dev_id); - disk->major = rbd_dev->major; - disk->first_minor = rbd_dev->minor; - if (single_major) - disk->flags |= GENHD_FL_EXT_DEVT; - disk->fops = &rbd_bd_ops; - disk->private_data = rbd_dev; - memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); rbd_dev->tag_set.ops = &rbd_mq_ops; rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; @@ -4948,13 +4923,27 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); if (err) - goto out_disk; + return err; - q = blk_mq_init_queue(&rbd_dev->tag_set); - if (IS_ERR(q)) { - err = PTR_ERR(q); + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_tag_set; } + q = disk->queue; + + snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", + rbd_dev->dev_id); + disk->major = rbd_dev->major; + disk->first_minor = rbd_dev->minor; + if (single_major) { + disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT); + disk->flags |= GENHD_FL_EXT_DEVT; + } else { + disk->minors = RBD_MINORS_PER_MAJOR; + } + disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ @@ -4976,21 +4965,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); - /* - * disk_release() expects a queue ref from add_disk() and will - * put it. Hold an extra ref until add_disk() is called. - */ - WARN_ON(!blk_get_queue(q)); - disk->queue = q; - q->queuedata = rbd_dev; - rbd_dev->disk = disk; return 0; out_tag_set: blk_mq_free_tag_set(&rbd_dev->tag_set); -out_disk: - put_disk(disk); return err; } @@ -7088,8 +7067,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_image_lock; device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); - /* see rbd_init_disk() */ - blk_put_queue(rbd_dev->disk->queue); spin_lock(&rbd_dev_list_lock); list_add_tail(&rbd_dev->node, &rbd_dev_list); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index c604a402cd5c..e9cc413495f0 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -92,7 +92,7 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->fua = !!(rsp->cache_policy & RNBD_FUA); dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; - dev->max_segments = BMAX_SEGMENTS; + dev->max_segments = sess->max_segments; return 0; } @@ -1292,7 +1292,7 @@ find_and_get_or_create_sess(const char *sessname, sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, paths, path_cnt, port_nr, 0, /* Do not use pdu of rtrs */ - RECONNECT_DELAY, BMAX_SEGMENTS, + RECONNECT_DELAY, MAX_RECONNECTS, nr_poll_queues); if (IS_ERR(sess->rtrs)) { err = PTR_ERR(sess->rtrs); @@ -1306,6 +1306,7 @@ find_and_get_or_create_sess(const char *sessname, sess->max_io_size = attrs.max_io_size; sess->queue_depth = attrs.queue_depth; sess->nr_poll_queues = nr_poll_queues; + sess->max_segments = attrs.max_segments; err = setup_mq_tags(sess); if (err) @@ -1353,18 +1354,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) } } -static int setup_mq_dev(struct rnbd_clt_dev *dev) -{ - dev->queue = blk_mq_init_queue(&dev->sess->tag_set); - if (IS_ERR(dev->queue)) { - rnbd_clt_err(dev, "Initializing multiqueue queue failed, err: %ld\n", - PTR_ERR(dev->queue)); - return PTR_ERR(dev->queue); - } - rnbd_init_mq_hw_queues(dev); - return 0; -} - static void setup_request_queue(struct rnbd_clt_dev *dev) { blk_queue_logical_block_size(dev->queue, dev->logical_block_size); @@ -1393,13 +1382,13 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_io_opt(dev->queue, dev->sess->max_io_size); blk_queue_virt_boundary(dev->queue, SZ_4K - 1); blk_queue_write_cache(dev->queue, dev->wc, dev->fua); - dev->queue->queuedata = dev; } static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) { dev->gd->major = rnbd_client_major; dev->gd->first_minor = idx << RNBD_PART_BITS; + dev->gd->minors = 1 << RNBD_PART_BITS; dev->gd->fops = &rnbd_client_ops; dev->gd->queue = dev->queue; dev->gd->private_data = dev; @@ -1426,24 +1415,18 @@ static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) { - int err, idx = dev->clt_device_id; + int idx = dev->clt_device_id; dev->size = dev->nsectors * dev->logical_block_size; - err = setup_mq_dev(dev); - if (err) - return err; + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev); + if (IS_ERR(dev->gd)) + return PTR_ERR(dev->gd); + dev->queue = dev->gd->queue; + rnbd_init_mq_hw_queues(dev); setup_request_queue(dev); - - dev->gd = alloc_disk_node(1 << RNBD_PART_BITS, NUMA_NO_NODE); - if (!dev->gd) { - blk_cleanup_queue(dev->queue); - return -ENOMEM; - } - rnbd_clt_setup_gen_disk(dev, idx); - return 0; } @@ -1650,8 +1633,7 @@ put_sess: static void destroy_gen_disk(struct rnbd_clt_dev *dev) { del_gendisk(dev->gd); - blk_cleanup_queue(dev->queue); - put_disk(dev->gd); + blk_cleanup_disk(dev->gd); } static void destroy_sysfs(struct rnbd_clt_dev *dev, diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index b5322c5aaac0..9ef8c4f306f2 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -20,10 +20,6 @@ #include "rnbd-proto.h" #include "rnbd-log.h" -/* Max. number of segments per IO request, Mellanox Connect X ~ Connect X5, - * choose minimial 30 for all, minus 1 for internal protocol, so 29. - */ -#define BMAX_SEGMENTS 29 /* time in seconds between reconnect tries, default to 30 s */ #define RECONNECT_DELAY 30 /* @@ -89,6 +85,7 @@ struct rnbd_clt_session { atomic_t busy; size_t queue_depth; u32 max_io_size; + u32 max_segments; struct blk_mq_tag_set tag_set; u32 nr_poll_queues; struct mutex lock; /* protects state and devs_list */ diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 9a28322a8cd8..1cc40b0ea761 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -236,47 +236,40 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) return -ENOMEM; } - card->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!card->queue) { - dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); - unregister_blkdev(card->major, DRIVER_NAME); - return -ENOMEM; - } - - card->gendisk = alloc_disk(blkdev_minors); + card->gendisk = blk_alloc_disk(blkdev_minors); if (!card->gendisk) { dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); - blk_cleanup_queue(card->queue); unregister_blkdev(card->major, DRIVER_NAME); return -ENOMEM; } if (card->config_valid) { blk_size = card->config.data.block_size; - blk_queue_dma_alignment(card->queue, blk_size - 1); - blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_dma_alignment(card->gendisk->queue, blk_size - 1); + blk_queue_logical_block_size(card->gendisk->queue, blk_size); } - blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); - blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + blk_queue_max_hw_sectors(card->gendisk->queue, blkdev_max_hw_sectors); + blk_queue_physical_block_size(card->gendisk->queue, RSXX_HW_BLK_SIZE); - blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, card->gendisk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->gendisk->queue); if (rsxx_discard_supported(card)) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue); - blk_queue_max_discard_sectors(card->queue, + blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->gendisk->queue); + blk_queue_max_discard_sectors(card->gendisk->queue, RSXX_HW_BLK_SIZE >> 9); - card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; - card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->gendisk->queue->limits.discard_granularity = + RSXX_HW_BLK_SIZE; + card->gendisk->queue->limits.discard_alignment = + RSXX_HW_BLK_SIZE; } snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), "rsxx%d", card->disk_id); card->gendisk->major = card->major; - card->gendisk->first_minor = 0; + card->gendisk->minors = blkdev_minors; card->gendisk->fops = &rsxx_fops; card->gendisk->private_data = card; - card->gendisk->queue = card->queue; return 0; } @@ -286,10 +279,8 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card) if (!enable_blkdev) return; - put_disk(card->gendisk); + blk_cleanup_disk(card->gendisk); card->gendisk = NULL; - - blk_cleanup_queue(card->queue); unregister_blkdev(card->major, DRIVER_NAME); } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 0574f4495755..ed182f3dd054 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -74,9 +74,6 @@ struct dma_tracker { struct rsxx_dma *dma; }; -#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ - (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) - struct dma_tracker_list { spinlock_t lock; int head; @@ -808,7 +805,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, memset(&ctrl->stats, 0, sizeof(ctrl->stats)); - ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + ctrl->trackers = vmalloc(struct_size(ctrl->trackers, list, + RSXX_MAX_OUTSTANDING_CMDS)); if (!ctrl->trackers) return -ENOMEM; diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 6147977994ff..26c320c0d924 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -154,7 +154,6 @@ struct rsxx_cardinfo { bool bdev_attached; int disk_id; int major; - struct request_queue *queue; struct gendisk *gendisk; struct { /* Used to convert a byte address to a device address. */ diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 39aeebc6837d..4d4bb810c2ae 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -780,27 +780,6 @@ static const struct blk_mq_ops vdc_mq_ops = { .queue_rq = vdc_queue_rq, }; -static void cleanup_queue(struct request_queue *q) -{ - struct vdc_port *port = q->queuedata; - - blk_cleanup_queue(q); - blk_mq_free_tag_set(&port->tag_set); -} - -static struct request_queue *init_queue(struct vdc_port *port) -{ - struct request_queue *q; - - q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(q)) - return q; - - q->queuedata = port; - return q; -} - static int probe_disk(struct vdc_port *port) { struct request_queue *q; @@ -838,21 +817,21 @@ static int probe_disk(struct vdc_port *port) (u64)geom.num_sec); } - q = init_queue(port); - if (IS_ERR(q)) { - printk(KERN_ERR PFX "%s: Could not allocate queue.\n", - port->vio.name); - return PTR_ERR(q); - } - g = alloc_disk(1 << PARTITION_SHIFT); - if (!g) { + err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops, + VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE); + if (err) + return err; + + g = blk_mq_alloc_disk(&port->tag_set, port); + if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); - cleanup_queue(q); - return -ENOMEM; + blk_mq_free_tag_set(&port->tag_set); + return PTR_ERR(g); } port->disk = g; + q = g->queue; /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(q, PAGE_SIZE - 1); @@ -862,6 +841,7 @@ static int probe_disk(struct vdc_port *port) blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT; + g->minors = 1 << PARTITION_SHIFT; strcpy(g->disk_name, port->disk_name); g->fops = &vdc_fops; @@ -1001,9 +981,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) } port = kzalloc(sizeof(*port), GFP_KERNEL); - err = -ENOMEM; if (!port) { - printk(KERN_ERR PFX "Cannot allocate vdc_port.\n"); + err = -ENOMEM; goto err_out_release_mdesc; } @@ -1071,7 +1050,7 @@ err_out_release_mdesc: return err; } -static int vdc_port_remove(struct vio_dev *vdev) +static void vdc_port_remove(struct vio_dev *vdev) { struct vdc_port *port = dev_get_drvdata(&vdev->dev); @@ -1083,9 +1062,8 @@ static int vdc_port_remove(struct vio_dev *vdev) del_timer_sync(&port->vio.timer); del_gendisk(port->disk); - cleanup_queue(port->disk->queue); - put_disk(port->disk); - port->disk = NULL; + blk_cleanup_disk(port->disk); + blk_mq_free_tag_set(&port->tag_set); vdc_free_tx_ring(port); vio_ldc_free(&port->vio); @@ -1094,7 +1072,6 @@ static int vdc_port_remove(struct vio_dev *vdev) kfree(port); } - return 0; } static void vdc_requeue_inflight(struct vdc_port *port) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2917b21f48ff..7ccc8d2a41bc 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -800,23 +800,20 @@ static int swim_floppy_init(struct swim_priv *swd) spin_lock_init(&swd->lock); for (drive = 0; drive < swd->floppy_count; drive++) { - struct request_queue *q; - - swd->unit[drive].disk = alloc_disk(1); - if (swd->unit[drive].disk == NULL) { - err = -ENOMEM; + err = blk_mq_alloc_sq_tag_set(&swd->unit[drive].tag_set, + &swim_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE); + if (err) goto exit_put_disks; - } - q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops, - 2, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(q)) { - err = PTR_ERR(q); + swd->unit[drive].disk = + blk_mq_alloc_disk(&swd->unit[drive].tag_set, + &swd->unit[drive]); + if (IS_ERR(swd->unit[drive].disk)) { + blk_mq_free_tag_set(&swd->unit[drive].tag_set); + err = PTR_ERR(swd->unit[drive].disk); goto exit_put_disks; } - swd->unit[drive].disk->queue = q; - swd->unit[drive].disk->queue->queuedata = &swd->unit[drive]; swd->unit[drive].swd = swd; } @@ -824,6 +821,7 @@ static int swim_floppy_init(struct swim_priv *swd) swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE; swd->unit[drive].disk->major = FLOPPY_MAJOR; swd->unit[drive].disk->first_minor = drive; + swd->unit[drive].disk->minors = 1; sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); swd->unit[drive].disk->fops = &floppy_fops; swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE; @@ -839,14 +837,10 @@ exit_put_disks: do { struct gendisk *disk = swd->unit[drive].disk; - if (disk) { - if (disk->queue) { - blk_cleanup_queue(disk->queue); - disk->queue = NULL; - } - blk_mq_free_tag_set(&swd->unit[drive].tag_set); - put_disk(disk); - } + if (!disk) + continue; + blk_cleanup_disk(disk); + blk_mq_free_tag_set(&swd->unit[drive].tag_set); } while (drive--); return err; } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index a515d0c1d2cb..965af0a3e95b 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1202,30 +1202,27 @@ static int swim3_attach(struct macio_dev *mdev, return rc; } - disk = alloc_disk(1); - if (disk == NULL) { - rc = -ENOMEM; - goto out_unregister; - } - fs = &floppy_states[floppy_count]; memset(fs, 0, sizeof(*fs)); - disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - rc = PTR_ERR(disk->queue); - disk->queue = NULL; - goto out_put_disk; + rc = blk_mq_alloc_sq_tag_set(&fs->tag_set, &swim3_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (rc) + goto out_unregister; + + disk = blk_mq_alloc_disk(&fs->tag_set, fs); + if (IS_ERR(disk)) { + rc = PTR_ERR(disk); + goto out_free_tag_set; } - disk->queue->queuedata = fs; rc = swim3_add_device(mdev, floppy_count); if (rc) - goto out_cleanup_queue; + goto out_cleanup_disk; disk->major = FLOPPY_MAJOR; disk->first_minor = floppy_count; + disk->minors = 1; disk->fops = &floppy_fops; disk->private_data = fs; disk->events = DISK_EVENT_MEDIA_CHANGE; @@ -1237,12 +1234,10 @@ static int swim3_attach(struct macio_dev *mdev, disks[floppy_count++] = disk; return 0; -out_cleanup_queue: - blk_cleanup_queue(disk->queue); - disk->queue = NULL; +out_cleanup_disk: + blk_cleanup_disk(disk); +out_free_tag_set: blk_mq_free_tag_set(&fs->tag_set); -out_put_disk: - put_disk(disk); out_unregister: if (floppy_count == 0) unregister_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 2cdf2771f8e8..7b54353ee92b 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1343,32 +1343,25 @@ static int carm_init_disk(struct carm_host *host, unsigned int port_no) { struct carm_port *port = &host->port[port_no]; struct gendisk *disk; - struct request_queue *q; port->host = host; port->port_no = port_no; - disk = alloc_disk(CARM_MINORS_PER_MAJOR); - if (!disk) - return -ENOMEM; + disk = blk_mq_alloc_disk(&host->tag_set, port); + if (IS_ERR(disk)) + return PTR_ERR(disk); port->disk = disk; sprintf(disk->disk_name, DRV_NAME "/%u", (unsigned int)host->id * CARM_MAX_PORTS + port_no); disk->major = host->major; disk->first_minor = port_no * CARM_MINORS_PER_MAJOR; + disk->minors = CARM_MINORS_PER_MAJOR; disk->fops = &carm_bd_ops; disk->private_data = port; - q = blk_mq_init_queue(&host->tag_set); - if (IS_ERR(q)) - return PTR_ERR(q); - - blk_queue_max_segments(q, CARM_MAX_REQ_SG); - blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); - - q->queuedata = port; - disk->queue = q; + blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG); + blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY); return 0; } @@ -1382,9 +1375,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no) if (disk->flags & GENHD_FL_UP) del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - put_disk(disk); + blk_cleanup_disk(disk); } static int carm_init_shm(struct carm_host *host) @@ -1429,8 +1420,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { - printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n", - pci_name(pdev)); rc = -ENOMEM; goto err_out_regions; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b9fa3ef5b57c..afb37aac09e8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -21,6 +21,9 @@ #define VQ_NAME_LEN 16 #define MAX_DISCARD_SEGMENTS 256u +/* The maximum number of sg elements that fit into a virtqueue */ +#define VIRTIO_BLK_MAX_SG_ELEMS 32768 + static int major; static DEFINE_IDA(vd_index_ida); @@ -447,13 +450,6 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) /* Host must always specify the capacity. */ virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); - /* If capacity is too big, truncate with warning. */ - if ((sector_t)capacity != capacity) { - dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", - (unsigned long long)capacity); - capacity = (sector_t)-1; - } - nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); string_get_size(nblocks, queue_logical_block_size(q), @@ -696,6 +692,28 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); +static int virtblk_validate(struct virtio_device *vdev) +{ + u32 blk_size; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) + return 0; + + blk_size = virtio_cread32(vdev, + offsetof(struct virtio_blk_config, blk_size)); + + if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) + __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE); + + return 0; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -707,12 +725,6 @@ static int virtblk_probe(struct virtio_device *vdev) u8 physical_block_exp, alignment_offset; unsigned int queue_depth; - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), GFP_KERNEL); if (err < 0) @@ -728,7 +740,10 @@ static int virtblk_probe(struct virtio_device *vdev) if (err || !sg_elems) sg_elems = 1; - /* We need an extra sg elements at head and tail. */ + /* Prevent integer overflows and honor max vq size */ + sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); + + /* We need extra sg elements at head and tail. */ sg_elems += 2; vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); if (!vblk) { @@ -749,13 +764,6 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vblk; - /* FIXME: How many partitions? How long is a piece of string? */ - vblk->disk = alloc_disk(1 << PART_BITS); - if (!vblk->disk) { - err = -ENOMEM; - goto out_free_vq; - } - /* Default queue sizing is to fill the ring. */ if (likely(!virtblk_queue_depth)) { queue_depth = vblk->vqs[0].vq->num_free; @@ -779,21 +787,20 @@ static int virtblk_probe(struct virtio_device *vdev) err = blk_mq_alloc_tag_set(&vblk->tag_set); if (err) - goto out_put_disk; + goto out_free_vq; - q = blk_mq_init_queue(&vblk->tag_set); - if (IS_ERR(q)) { - err = -ENOMEM; + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); + if (IS_ERR(vblk->disk)) { + err = PTR_ERR(vblk->disk); goto out_free_tags; } - vblk->disk->queue = q; - - q->queuedata = vblk; + q = vblk->disk->queue; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); vblk->disk->major = major; vblk->disk->first_minor = index_to_minor(index); + vblk->disk->minors = 1 << PART_BITS; vblk->disk->private_data = vblk; vblk->disk->fops = &virtblk_fops; vblk->disk->flags |= GENHD_FL_EXT_DEVT; @@ -832,6 +839,14 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_size = queue_logical_block_size(q); + if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) { + dev_err(&vdev->dev, + "block size is changed unexpectedly, now is %u\n", + blk_size); + err = -EINVAL; + goto err_cleanup_disk; + } + /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, @@ -890,10 +905,10 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +err_cleanup_disk: + blk_cleanup_disk(vblk->disk); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); -out_put_disk: - put_disk(vblk->disk); out_free_vq: vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -913,8 +928,7 @@ static void virtblk_remove(struct virtio_device *vdev) flush_work(&vblk->config_work); del_gendisk(vblk->disk); - blk_cleanup_queue(vblk->disk->queue); - + blk_cleanup_disk(vblk->disk); blk_mq_free_tag_set(&vblk->tag_set); mutex_lock(&vblk->vdev_mutex); @@ -925,7 +939,6 @@ static void virtblk_remove(struct virtio_device *vdev) /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ vblk->vdev = NULL; - put_disk(vblk->disk); vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -948,6 +961,8 @@ static int virtblk_freeze(struct virtio_device *vdev) blk_mq_quiesce_queue(vblk->disk->queue); vdev->config->del_vqs(vdev); + kfree(vblk->vqs); + return 0; } @@ -994,6 +1009,7 @@ static struct virtio_driver virtio_blk = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtblk_validate, .probe = virtblk_probe, .remove = virtblk_remove, .config_changed = virtblk_config_changed, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 10df39a8b18d..d83fee21f6c5 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) static int blkif_ioctl(struct block_device *bdev, fmode_t mode, unsigned command, unsigned long argument) { - struct blkfront_info *info = bdev->bd_disk->private_data; int i; - dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", - command, (long)argument); - switch (command) { case CDROMMULTISESSION: - dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); for (i = 0; i < sizeof(struct cdrom_multisession); i++) if (put_user(0, (char __user *)(argument + i))) return -EFAULT; return 0; - - case CDROM_GET_CAPABILITY: { - struct gendisk *gd = info->gd; - if (gd->flags & GENHD_FL_CD) + case CDROM_GET_CAPABILITY: + if (bdev->bd_disk->flags & GENHD_FL_CD) return 0; return -EINVAL; - } - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ + return -EINVAL; } - - return 0; } static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, @@ -968,48 +955,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info) blk_queue_dma_alignment(rq, 511); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, - unsigned int physical_sector_size) -{ - struct request_queue *rq; - struct blkfront_info *info = gd->private_data; - - memset(&info->tag_set, 0, sizeof(info->tag_set)); - info->tag_set.ops = &blkfront_mq_ops; - info->tag_set.nr_hw_queues = info->nr_rings; - if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) { - /* - * When indirect descriptior is not supported, the I/O request - * will be split between multiple request in the ring. - * To avoid problems when sending the request, divide by - * 2 the depth of the queue. - */ - info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2; - } else - info->tag_set.queue_depth = BLK_RING_SIZE(info); - info->tag_set.numa_node = NUMA_NO_NODE; - info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - info->tag_set.cmd_size = sizeof(struct blkif_req); - info->tag_set.driver_data = info; - - if (blk_mq_alloc_tag_set(&info->tag_set)) - return -EINVAL; - rq = blk_mq_init_queue(&info->tag_set); - if (IS_ERR(rq)) { - blk_mq_free_tag_set(&info->tag_set); - return PTR_ERR(rq); - } - - rq->queuedata = info; - info->rq = gd->queue = rq; - info->gd = gd; - info->sector_size = sector_size; - info->physical_sector_size = physical_sector_size; - blkif_set_queue_limits(info); - - return 0; -} - static const char *flush_info(struct blkfront_info *info) { if (info->feature_flush && info->feature_fua) @@ -1146,12 +1091,36 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, err = xlbd_reserve_minors(minor, nr_minors); if (err) - goto out; + return err; err = -ENODEV; - gd = alloc_disk(nr_minors); - if (gd == NULL) - goto release; + memset(&info->tag_set, 0, sizeof(info->tag_set)); + info->tag_set.ops = &blkfront_mq_ops; + info->tag_set.nr_hw_queues = info->nr_rings; + if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) { + /* + * When indirect descriptior is not supported, the I/O request + * will be split between multiple request in the ring. + * To avoid problems when sending the request, divide by + * 2 the depth of the queue. + */ + info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2; + } else + info->tag_set.queue_depth = BLK_RING_SIZE(info); + info->tag_set.numa_node = NUMA_NO_NODE; + info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + info->tag_set.cmd_size = sizeof(struct blkif_req); + info->tag_set.driver_data = info; + + err = blk_mq_alloc_tag_set(&info->tag_set); + if (err) + goto out_release_minors; + + gd = blk_mq_alloc_disk(&info->tag_set, info); + if (IS_ERR(gd)) { + err = PTR_ERR(gd); + goto out_free_tag_set; + } strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); @@ -1164,14 +1133,16 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->major = XENVBD_MAJOR; gd->first_minor = minor; + gd->minors = nr_minors; gd->fops = &xlvbd_block_fops; gd->private_data = info; set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { - del_gendisk(gd); - goto release; - } + info->rq = gd->queue; + info->gd = gd; + info->sector_size = sector_size; + info->physical_sector_size = physical_sector_size; + blkif_set_queue_limits(info); xlvbd_flush(info); @@ -1186,45 +1157,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, return 0; - release: +out_free_tag_set: + blk_mq_free_tag_set(&info->tag_set); +out_release_minors: xlbd_release_minors(minor, nr_minors); - out: return err; } -static void xlvbd_release_gendisk(struct blkfront_info *info) -{ - unsigned int minor, nr_minors, i; - struct blkfront_ring_info *rinfo; - - if (info->rq == NULL) - return; - - /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - - for_each_rinfo(info, rinfo, i) { - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&rinfo->callback); - - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work(&rinfo->work); - } - - del_gendisk(info->gd); - - minor = info->gd->first_minor; - nr_minors = info->gd->minors; - xlbd_release_minors(minor, nr_minors); - - blk_cleanup_queue(info->rq); - blk_mq_free_tag_set(&info->tag_set); - info->rq = NULL; - - put_disk(info->gd); - info->gd = NULL; -} - /* Already hold rinfo->ring_lock. */ static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo) { @@ -1774,12 +1713,6 @@ abort_transaction: return err; } -static void free_info(struct blkfront_info *info) -{ - list_del(&info->info_list); - kfree(info); -} - /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1898,13 +1831,6 @@ again: xenbus_dev_fatal(dev, err, "%s", message); destroy_blkring: blkif_free(info, 0); - - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - - dev_set_drvdata(&dev->dev, NULL); - return err; } @@ -2144,38 +2070,26 @@ static int blkfront_resume(struct xenbus_device *dev) static void blkfront_closing(struct blkfront_info *info) { struct xenbus_device *xbdev = info->xbdev; - struct block_device *bdev = NULL; - - mutex_lock(&info->mutex); + struct blkfront_ring_info *rinfo; + unsigned int i; - if (xbdev->state == XenbusStateClosing) { - mutex_unlock(&info->mutex); + if (xbdev->state == XenbusStateClosing) return; - } - - if (info->gd) - bdev = bdgrab(info->gd->part0); - mutex_unlock(&info->mutex); - - if (!bdev) { - xenbus_frontend_closed(xbdev); - return; - } + /* No more blkif_request(). */ + blk_mq_stop_hw_queues(info->rq); + blk_set_queue_dying(info->rq); + set_capacity(info->gd, 0); - mutex_lock(&bdev->bd_mutex); + for_each_rinfo(info, rinfo, i) { + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&rinfo->callback); - if (bdev->bd_openers) { - xenbus_dev_error(xbdev, -EBUSY, - "Device in use; refusing to close"); - xenbus_switch_state(xbdev, XenbusStateClosing); - } else { - xlvbd_release_gendisk(info); - xenbus_frontend_closed(xbdev); + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_work(&rinfo->work); } - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + xenbus_frontend_closed(xbdev); } static void blkfront_setup_discard(struct blkfront_info *info) @@ -2490,8 +2404,7 @@ static void blkback_changed(struct xenbus_device *dev, break; fallthrough; case XenbusStateClosing: - if (info) - blkfront_closing(info); + blkfront_closing(info); break; } } @@ -2499,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev, static int blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); - struct block_device *bdev = NULL; - struct gendisk *disk; dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - if (!info) - return 0; - - blkif_free(info, 0); - - mutex_lock(&info->mutex); - - disk = info->gd; - if (disk) - bdev = bdgrab(disk->part0); - - info->xbdev = NULL; - mutex_unlock(&info->mutex); - - if (!bdev) { - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - return 0; - } - - /* - * The xbdev was removed before we reached the Closed - * state. See if it's safe to remove the disk. If the bdev - * isn't closed yet, we let release take care of it. - */ - - mutex_lock(&bdev->bd_mutex); - info = disk->private_data; - - dev_warn(disk_to_dev(disk), - "%s was hot-unplugged, %d stale handles\n", - xbdev->nodename, bdev->bd_openers); + del_gendisk(info->gd); - if (info && !bdev->bd_openers) { - xlvbd_release_gendisk(info); - disk->private_data = NULL; - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - } + mutex_lock(&blkfront_mutex); + list_del(&info->info_list); + mutex_unlock(&blkfront_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + blkif_free(info, 0); + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + kfree(info); return 0; } @@ -2559,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev) return info->is_ready && info->xbdev; } -static int blkif_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct blkfront_info *info; - int err = 0; - - mutex_lock(&blkfront_mutex); - - info = disk->private_data; - if (!info) { - /* xbdev gone */ - err = -ERESTARTSYS; - goto out; - } - - mutex_lock(&info->mutex); - - if (!info->gd) - /* xbdev is closed */ - err = -ERESTARTSYS; - - mutex_unlock(&info->mutex); - -out: - mutex_unlock(&blkfront_mutex); - return err; -} - -static void blkif_release(struct gendisk *disk, fmode_t mode) -{ - struct blkfront_info *info = disk->private_data; - struct xenbus_device *xbdev; - - mutex_lock(&blkfront_mutex); - if (disk->part0->bd_openers) - goto out_mutex; - - /* - * Check if we have been instructed to close. We will have - * deferred this request, because the bdev was still open. - */ - - mutex_lock(&info->mutex); - xbdev = info->xbdev; - - if (xbdev && xbdev->state == XenbusStateClosing) { - /* pending switch to state closed */ - dev_info(disk_to_dev(disk), "releasing disk\n"); - xlvbd_release_gendisk(info); - xenbus_frontend_closed(info->xbdev); - } - - mutex_unlock(&info->mutex); - - if (!xbdev) { - /* sudden device removal */ - dev_info(disk_to_dev(disk), "releasing disk\n"); - xlvbd_release_gendisk(info); - disk->private_data = NULL; - free_info(info); - } - -out_mutex: - mutex_unlock(&blkfront_mutex); -} - static const struct block_device_operations xlvbd_block_fops = { .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, .getgeo = blkif_getgeo, .ioctl = blkif_ioctl, .compat_ioctl = blkdev_compat_ptr_ioctl, diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index c1d20818e649..4eef218108c6 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -236,11 +236,8 @@ static int z2_open(struct block_device *bdev, fmode_t mode) case Z2MINOR_Z2ONLY: z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); - if (z2ram_map == NULL) { - printk(KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n"); + if (!z2ram_map) goto err_out; - } get_z2ram(); @@ -253,11 +250,8 @@ static int z2_open(struct block_device *bdev, fmode_t mode) case Z2MINOR_CHIPONLY: z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); - if (z2ram_map == NULL) { - printk(KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n"); + if (!z2ram_map) goto err_out; - } get_chipram(); @@ -323,27 +317,20 @@ static const struct blk_mq_ops z2_mq_ops = { static int z2ram_register_disk(int minor) { - struct request_queue *q; struct gendisk *disk; - disk = alloc_disk(1); - if (!disk) - return -ENOMEM; - - q = blk_mq_init_queue(&tag_set); - if (IS_ERR(q)) { - put_disk(disk); - return PTR_ERR(q); - } + disk = blk_mq_alloc_disk(&tag_set, NULL); + if (IS_ERR(disk)) + return PTR_ERR(disk); disk->major = Z2RAM_MAJOR; disk->first_minor = minor; + disk->minors = 1; disk->fops = &z2_fops; if (minor) sprintf(disk->disk_name, "z2ram%d", minor); else sprintf(disk->disk_name, "z2ram"); - disk->queue = q; z2ram_gendisk[minor] = disk; add_disk(disk); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cf8deecc39ef..fcaf2750f68f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev, zram = dev_to_zram(dev); bdev = zram->disk->part0; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); return -EBUSY; } /* From now on, anyone can't open /dev/zram[0-9] */ zram->claim = true; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); zram->claim = false; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); return len; } @@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode) int ret = 0; struct zram *zram; - WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); + WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); zram = bdev->bd_disk->private_data; /* zram was claimed to reset so open request fails */ @@ -1890,7 +1890,6 @@ static const struct attribute_group *zram_disk_attr_groups[] = { static int zram_add(void) { struct zram *zram; - struct request_queue *queue; int ret, device_id; zram = kzalloc(sizeof(struct zram), GFP_KERNEL); @@ -1906,27 +1905,20 @@ static int zram_add(void) #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); #endif - queue = blk_alloc_queue(NUMA_NO_NODE); - if (!queue) { - pr_err("Error allocating disk queue for device %d\n", - device_id); - ret = -ENOMEM; - goto out_free_idr; - } /* gendisk structure */ - zram->disk = alloc_disk(1); + zram->disk = blk_alloc_disk(NUMA_NO_NODE); if (!zram->disk) { pr_err("Error allocating disk structure for device %d\n", device_id); ret = -ENOMEM; - goto out_free_queue; + goto out_free_idr; } zram->disk->major = zram_major; zram->disk->first_minor = device_id; + zram->disk->minors = 1; zram->disk->fops = &zram_devops; - zram->disk->queue = queue; zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); @@ -1969,8 +1961,6 @@ static int zram_add(void) pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_queue: - blk_cleanup_queue(queue); out_free_idr: idr_remove(&zram_index_idr, device_id); out_free_dev: @@ -1982,14 +1972,14 @@ static int zram_remove(struct zram *zram) { struct block_device *bdev = zram->disk->part0; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); return -EBUSY; } zram->claim = true; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); zram_debugfs_unregister(zram); @@ -2000,8 +1990,7 @@ static int zram_remove(struct zram *zram) pr_info("Removed device: %s\n", zram->disk->disk_name); del_gendisk(zram->disk); - blk_cleanup_queue(zram->disk->queue); - put_disk(zram->disk); + blk_cleanup_disk(zram->disk); kfree(zram); return 0; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 419a7e8281ee..80c3b43b4828 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -112,9 +112,9 @@ struct zram { /* * zram is claimed so open request will be failed */ - bool claim; /* Protected by bdev->bd_mutex */ - struct file *backing_dev; + bool claim; /* Protected by disk->open_mutex */ #ifdef CONFIG_ZRAM_WRITEBACK + struct file *backing_dev; spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; |