diff options
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r-- | drivers/block/loop.c | 617 |
1 files changed, 403 insertions, 214 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d58d68f3c7cd..f0cdff0c5fbf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -71,7 +71,6 @@ #include <linux/writeback.h> #include <linux/completion.h> #include <linux/highmem.h> -#include <linux/kthread.h> #include <linux/splice.h> #include <linux/sysfs.h> #include <linux/miscdevice.h> @@ -79,13 +78,57 @@ #include <linux/uio.h> #include <linux/ioprio.h> #include <linux/blk-cgroup.h> +#include <linux/sched/mm.h> #include "loop.h" #include <linux/uaccess.h> +#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) + static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -515,8 +558,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - if (cmd->css) - css_put(cmd->css); cmd->ret = ret; lo_rw_aio_do_completion(cmd); } @@ -577,8 +618,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_complete = lo_rw_aio_complete; cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); - if (cmd->css) - kthread_associate_blkcg(cmd->css); if (rw == WRITE) ret = call_write_iter(file, &cmd->iocb, &iter); @@ -586,7 +625,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ret = call_read_iter(file, &cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); - kthread_associate_blkcg(NULL); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); @@ -647,14 +685,13 @@ static inline void loop_update_dio(struct loop_device *lo) lo->use_dio); } -static void loop_reread_partitions(struct loop_device *lo, - struct block_device *bdev) +static void loop_reread_partitions(struct loop_device *lo) { int rc; - mutex_lock(&bdev->bd_mutex); - rc = bdev_disk_changed(bdev, false); - mutex_unlock(&bdev->bd_mutex); + mutex_lock(&lo->lo_disk->open_mutex); + rc = bdev_disk_changed(lo->lo_disk, false); + mutex_unlock(&lo->lo_disk->open_mutex); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -676,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -701,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -717,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -744,21 +783,30 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping - * the file ref can take bd_mutex which creates circular locking + * the file ref can take open_mutex which creates circular locking * dependency. */ fput(old_file); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -921,27 +969,100 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_alignment = 0; } -static void loop_unprepare_queue(struct loop_device *lo) +struct loop_worker { + struct rb_node rb_node; + struct work_struct work; + struct list_head cmd_list; + struct list_head idle_list; + struct loop_device *lo; + struct cgroup_subsys_state *blkcg_css; + unsigned long last_ran_at; +}; + +static void loop_workfn(struct work_struct *work); +static void loop_rootcg_workfn(struct work_struct *work); +static void loop_free_idle_workers(struct timer_list *timer); + +#ifdef CONFIG_BLK_CGROUP +static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { - kthread_flush_worker(&lo->worker); - kthread_stop(lo->worker_task); + return !css || css == blkcg_root_css; } - -static int loop_kthread_worker_fn(void *worker_ptr) +#else +static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { - current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - return kthread_worker_fn(worker_ptr); + return !css; } +#endif -static int loop_prepare_queue(struct loop_device *lo) +static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) { - kthread_init_worker(&lo->worker); - lo->worker_task = kthread_run(loop_kthread_worker_fn, - &lo->worker, "loop%d", lo->lo_number); - if (IS_ERR(lo->worker_task)) - return -ENOMEM; - set_user_nice(lo->worker_task, MIN_NICE); - return 0; + struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL; + struct loop_worker *cur_worker, *worker = NULL; + struct work_struct *work; + struct list_head *cmd_list; + + spin_lock_irq(&lo->lo_work_lock); + + if (queue_on_root_worker(cmd->blkcg_css)) + goto queue_work; + + node = &lo->worker_tree.rb_node; + + while (*node) { + parent = *node; + cur_worker = container_of(*node, struct loop_worker, rb_node); + if (cur_worker->blkcg_css == cmd->blkcg_css) { + worker = cur_worker; + break; + } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) { + node = &(*node)->rb_left; + } else { + node = &(*node)->rb_right; + } + } + if (worker) + goto queue_work; + + worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN); + /* + * In the event we cannot allocate a worker, just queue on the + * rootcg worker and issue the I/O as the rootcg + */ + if (!worker) { + cmd->blkcg_css = NULL; + if (cmd->memcg_css) + css_put(cmd->memcg_css); + cmd->memcg_css = NULL; + goto queue_work; + } + + worker->blkcg_css = cmd->blkcg_css; + css_get(worker->blkcg_css); + INIT_WORK(&worker->work, loop_workfn); + INIT_LIST_HEAD(&worker->cmd_list); + INIT_LIST_HEAD(&worker->idle_list); + worker->lo = lo; + rb_link_node(&worker->rb_node, parent, node); + rb_insert_color(&worker->rb_node, &lo->worker_tree); +queue_work: + if (worker) { + /* + * We need to remove from the idle list here while + * holding the lock so that the idle timer doesn't + * free the worker + */ + if (!list_empty(&worker->idle_list)) + list_del_init(&worker->idle_list); + work = &worker->work; + cmd_list = &worker->cmd_list; + } else { + work = &lo->rootcg_work; + cmd_list = &lo->rootcg_cmd_list; + } + list_add_tail(&cmd->list_entry, cmd_list); + queue_work(lo->workqueue, work); + spin_unlock_irq(&lo->lo_work_lock); } static void loop_update_rotational(struct loop_device *lo) @@ -1067,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1093,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1127,12 +1248,23 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, !file->f_op->write_iter) lo->lo_flags |= LO_FLAGS_READ_ONLY; - error = loop_prepare_queue(lo); - if (error) + lo->workqueue = alloc_workqueue("loop%d", + WQ_UNBOUND | WQ_FREEZABLE, + 0, + lo->lo_number); + if (!lo->workqueue) { + error = -ENOMEM; goto out_unlock; + } set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn); + INIT_LIST_HEAD(&lo->rootcg_cmd_list); + INIT_LIST_HEAD(&lo->idle_worker_list); + lo->worker_tree = RB_ROOT; + timer_setup(&lo->timer, loop_free_idle_workers, + TIMER_DEFERRABLE); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; lo->lo_backing_file = file; @@ -1154,6 +1286,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, blk_queue_physical_block_size(lo->lo_queue, bsize); blk_queue_io_min(lo->lo_queue, bsize); + loop_config_discard(lo); loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1161,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1172,21 +1308,20 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1200,6 +1335,19 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int err = 0; bool partscan = false; int lo_number; + struct loop_worker *pos, *worker; + + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { @@ -1219,6 +1367,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); + destroy_workqueue(lo->workqueue); + spin_lock_irq(&lo->lo_work_lock); + list_for_each_entry_safe(worker, pos, &lo->idle_worker_list, + idle_list) { + list_del(&worker->idle_list); + rb_erase(&worker->rb_node, &lo->worker_tree); + css_put(worker->blkcg_css); + kfree(worker); + } + spin_unlock_irq(&lo->lo_work_lock); + del_timer_sync(&lo->timer); + spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); @@ -1255,12 +1415,11 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; lo_number = lo->lo_number; - loop_unprepare_queue(lo); out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) { /* - * bd_mutex has been held already in release path, so don't + * open_mutex has been held already in release path, so don't * acquire it if this function is called in such case. * * If the reread partition isn't from release path, lo_refcnt @@ -1268,10 +1427,10 @@ out_unlock: * current holder is released. */ if (!release) - mutex_lock(&bdev->bd_mutex); - err = bdev_disk_changed(bdev, false); + mutex_lock(&lo->lo_disk->open_mutex); + err = bdev_disk_changed(lo->lo_disk, false); if (!release) - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&lo->lo_disk->open_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo_number, err); @@ -1298,7 +1457,7 @@ out_unlock: /* * Need not hold lo_mutex to fput backing file. Calling fput holding * lo_mutex triggers a circular lock dependency possibility warning as - * fput can take bd_mutex which is usually taken before lo_mutex. + * fput can take open_mutex which is usually taken before lo_mutex. */ if (filp) fput(filp); @@ -1341,7 +1500,6 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - struct block_device *bdev; kuid_t uid = current_uid(); int prev_lo_flags; bool partscan = false; @@ -1410,13 +1568,12 @@ out_unfreeze: if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - bdev = lo->lo_device; partscan = true; } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) - loop_reread_partitions(lo, bdev); + loop_reread_partitions(lo); return err; } @@ -1879,29 +2036,18 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo; + struct loop_device *lo = bdev->bd_disk->private_data; int err; - /* - * take loop_ctl_mutex to protect lo pointer from race with - * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention - * release it prior to updating lo->lo_refcnt. - */ - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - return err; - lo = bdev->bd_disk->private_data; - if (!lo) { - mutex_unlock(&loop_ctl_mutex); - return -ENXIO; - } err = mutex_lock_killable(&lo->lo_mutex); - mutex_unlock(&loop_ctl_mutex); if (err) return err; - atomic_inc(&lo->lo_refcnt); + if (lo->lo_state == Lo_deleting) + err = -ENXIO; + else + atomic_inc(&lo->lo_refcnt); mutex_unlock(&lo->lo_mutex); - return 0; + return err; } static void lo_release(struct gendisk *disk, fmode_t mode) @@ -2019,14 +2165,19 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, } /* always use the first bio's css */ + cmd->blkcg_css = NULL; + cmd->memcg_css = NULL; #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { - cmd->css = &bio_blkcg(rq->bio)->css; - css_get(cmd->css); - } else + if (rq->bio && rq->bio->bi_blkg) { + cmd->blkcg_css = &bio_blkcg(rq->bio)->css; +#ifdef CONFIG_MEMCG + cmd->memcg_css = + cgroup_get_e_css(cmd->blkcg_css->cgroup, + &memory_cgrp_subsys); +#endif + } #endif - cmd->css = NULL; - kthread_queue_work(&lo->worker, &cmd->work); + loop_queue_work(lo, cmd); return BLK_STS_OK; } @@ -2037,13 +2188,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd) const bool write = op_is_write(req_op(rq)); struct loop_device *lo = rq->q->queuedata; int ret = 0; + struct mem_cgroup *old_memcg = NULL; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed; } + if (cmd->blkcg_css) + kthread_associate_blkcg(cmd->blkcg_css); + if (cmd->memcg_css) + old_memcg = set_active_memcg( + mem_cgroup_from_css(cmd->memcg_css)); + ret = do_req_filebacked(lo, rq); + + if (cmd->blkcg_css) + kthread_associate_blkcg(NULL); + + if (cmd->memcg_css) { + set_active_memcg(old_memcg); + css_put(cmd->memcg_css); + } failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { @@ -2056,30 +2222,86 @@ static void loop_handle_cmd(struct loop_cmd *cmd) } } -static void loop_queue_work(struct kthread_work *work) +static void loop_set_timer(struct loop_device *lo) +{ + timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT); +} + +static void loop_process_work(struct loop_worker *worker, + struct list_head *cmd_list, struct loop_device *lo) +{ + int orig_flags = current->flags; + struct loop_cmd *cmd; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + spin_lock_irq(&lo->lo_work_lock); + while (!list_empty(cmd_list)) { + cmd = container_of( + cmd_list->next, struct loop_cmd, list_entry); + list_del(cmd_list->next); + spin_unlock_irq(&lo->lo_work_lock); + + loop_handle_cmd(cmd); + cond_resched(); + + spin_lock_irq(&lo->lo_work_lock); + } + + /* + * We only add to the idle list if there are no pending cmds + * *and* the worker will not run again which ensures that it + * is safe to free any worker on the idle list + */ + if (worker && !work_pending(&worker->work)) { + worker->last_ran_at = jiffies; + list_add_tail(&worker->idle_list, &lo->idle_worker_list); + loop_set_timer(lo); + } + spin_unlock_irq(&lo->lo_work_lock); + current->flags = orig_flags; +} + +static void loop_workfn(struct work_struct *work) { - struct loop_cmd *cmd = - container_of(work, struct loop_cmd, work); + struct loop_worker *worker = + container_of(work, struct loop_worker, work); + loop_process_work(worker, &worker->cmd_list, worker->lo); +} - loop_handle_cmd(cmd); +static void loop_rootcg_workfn(struct work_struct *work) +{ + struct loop_device *lo = + container_of(work, struct loop_device, rootcg_work); + loop_process_work(NULL, &lo->rootcg_cmd_list, lo); } -static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) +static void loop_free_idle_workers(struct timer_list *timer) { - struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct loop_device *lo = container_of(timer, struct loop_device, timer); + struct loop_worker *pos, *worker; - kthread_init_work(&cmd->work, loop_queue_work); - return 0; + spin_lock_irq(&lo->lo_work_lock); + list_for_each_entry_safe(worker, pos, &lo->idle_worker_list, + idle_list) { + if (time_is_after_jiffies(worker->last_ran_at + + LOOP_IDLE_WORKER_TIMEOUT)) + break; + list_del(&worker->idle_list); + rb_erase(&worker->rb_node, &lo->worker_tree); + css_put(worker->blkcg_css); + kfree(worker); + } + if (!list_empty(&lo->idle_worker_list)) + loop_set_timer(lo); + spin_unlock_irq(&lo->lo_work_lock); } static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, - .init_request = loop_init_request, .complete = lo_complete_rq, }; -static int loop_add(struct loop_device **l, int i) +static int loop_add(int i) { struct loop_device *lo; struct gendisk *disk; @@ -2089,9 +2311,12 @@ static int loop_add(struct loop_device **l, int i) lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) goto out; - lo->lo_state = Lo_unbound; + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + goto out_free_dev; + /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); @@ -2101,7 +2326,7 @@ static int loop_add(struct loop_device **l, int i) err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } if (err < 0) - goto out_free_dev; + goto out_unlock; i = err; err = -ENOMEM; @@ -2117,12 +2342,12 @@ static int loop_add(struct loop_device **l, int i) if (err) goto out_free_idr; - lo->lo_queue = blk_mq_init_queue(&lo->tag_set); - if (IS_ERR(lo->lo_queue)) { - err = PTR_ERR(lo->lo_queue); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_cleanup_tags; } - lo->lo_queue->queuedata = lo; + lo->lo_queue = lo->lo_disk->queue; blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS); @@ -2134,11 +2359,6 @@ static int loop_add(struct loop_device **l, int i) */ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); - err = -ENOMEM; - disk = lo->lo_disk = alloc_disk(1 << part_shift); - if (!disk) - goto out_free_queue; - /* * Disable partition scanning by default. The in-kernel partition * scanning can be requested individually per-device during its @@ -2164,22 +2384,24 @@ static int loop_add(struct loop_device **l, int i) mutex_init(&lo->lo_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); + spin_lock_init(&lo->lo_work_lock); disk->major = LOOP_MAJOR; disk->first_minor = i << part_shift; + disk->minors = 1 << part_shift; disk->fops = &lo_fops; disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); add_disk(disk); - *l = lo; - return lo->lo_number; + mutex_unlock(&loop_ctl_mutex); + return i; -out_free_queue: - blk_cleanup_queue(lo->lo_queue); out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: idr_remove(&loop_index_idr, i); +out_unlock: + mutex_unlock(&loop_ctl_mutex); out_free_dev: kfree(lo); out: @@ -2189,116 +2411,92 @@ out: static void loop_remove(struct loop_device *lo) { del_gendisk(lo->lo_disk); - blk_cleanup_queue(lo->lo_queue); + blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); - put_disk(lo->lo_disk); mutex_destroy(&lo->lo_mutex); kfree(lo); } -static int find_free_cb(int id, void *ptr, void *data) +static void loop_probe(dev_t dev) { - struct loop_device *lo = ptr; - struct loop_device **l = data; + int idx = MINOR(dev) >> part_shift; - if (lo->lo_state == Lo_unbound) { - *l = lo; - return 1; - } - return 0; + if (max_loop && idx >= max_loop) + return; + loop_add(idx); } -static int loop_lookup(struct loop_device **l, int i) +static int loop_control_remove(int idx) { struct loop_device *lo; - int ret = -ENODEV; + int ret; - if (i < 0) { - int err; + if (idx < 0) { + pr_warn("deleting an unspecified loop device is not supported.\n"); + return -EINVAL; + } + + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; - err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); - if (err == 1) { - *l = lo; - ret = lo->lo_number; - } - goto out; + lo = idr_find(&loop_index_idr, idx); + if (!lo) { + ret = -ENODEV; + goto out_unlock_ctrl; } - /* lookup and return a specific i */ - lo = idr_find(&loop_index_idr, i); - if (lo) { - *l = lo; - ret = lo->lo_number; + ret = mutex_lock_killable(&lo->lo_mutex); + if (ret) + goto out_unlock_ctrl; + if (lo->lo_state != Lo_unbound || + atomic_read(&lo->lo_refcnt) > 0) { + mutex_unlock(&lo->lo_mutex); + ret = -EBUSY; + goto out_unlock_ctrl; } -out: + lo->lo_state = Lo_deleting; + mutex_unlock(&lo->lo_mutex); + + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); +out_unlock_ctrl: + mutex_unlock(&loop_ctl_mutex); return ret; } -static void loop_probe(dev_t dev) +static int loop_control_get_free(int idx) { - int idx = MINOR(dev) >> part_shift; struct loop_device *lo; + int id, ret; - if (max_loop && idx >= max_loop) - return; - - mutex_lock(&loop_ctl_mutex); - if (loop_lookup(&lo, idx) < 0) - loop_add(&lo, idx); + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + idr_for_each_entry(&loop_index_idr, lo, id) { + if (lo->lo_state == Lo_unbound) + goto found; + } + mutex_unlock(&loop_ctl_mutex); + return loop_add(-1); +found: mutex_unlock(&loop_ctl_mutex); + return id; } static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { - struct loop_device *lo; - int ret; - - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; - - ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: - ret = loop_lookup(&lo, parm); - if (ret >= 0) { - ret = -EEXIST; - break; - } - ret = loop_add(&lo, parm); - break; + return loop_add(parm); case LOOP_CTL_REMOVE: - ret = loop_lookup(&lo, parm); - if (ret < 0) - break; - ret = mutex_lock_killable(&lo->lo_mutex); - if (ret) - break; - if (lo->lo_state != Lo_unbound) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - if (atomic_read(&lo->lo_refcnt) > 0) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_mutex); - idr_remove(&loop_index_idr, lo->lo_number); - loop_remove(lo); - break; + return loop_control_remove(parm); case LOOP_CTL_GET_FREE: - ret = loop_lookup(&lo, -1); - if (ret >= 0) - break; - ret = loop_add(&lo, -1); + return loop_control_get_free(parm); + default: + return -ENOSYS; } - mutex_unlock(&loop_ctl_mutex); - - return ret; } static const struct file_operations loop_ctl_fops = { @@ -2321,7 +2519,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - struct loop_device *lo; int err; part_shift = 0; @@ -2373,10 +2570,8 @@ static int __init loop_init(void) } /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) - loop_add(&lo, i); - mutex_unlock(&loop_ctl_mutex); + loop_add(i); printk(KERN_INFO "loop: module loaded\n"); return 0; @@ -2387,26 +2582,20 @@ err_out: return err; } -static int loop_exit_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - - loop_remove(lo); - return 0; -} - static void __exit loop_exit(void) { - mutex_lock(&loop_ctl_mutex); - - idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); - idr_destroy(&loop_index_idr); + struct loop_device *lo; + int id; unregister_blkdev(LOOP_MAJOR, "loop"); - misc_deregister(&loop_misc); + mutex_lock(&loop_ctl_mutex); + idr_for_each_entry(&loop_index_idr, lo, id) + loop_remove(lo); mutex_unlock(&loop_ctl_mutex); + + idr_destroy(&loop_index_idr); } module_init(loop_init); |