diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 20 | ||||
-rw-r--r-- | block/badblocks.c | 6 | ||||
-rw-r--r-- | block/bdev.c | 260 | ||||
-rw-r--r-- | block/blk-cgroup.c | 13 | ||||
-rw-r--r-- | block/blk-cgroup.h | 2 | ||||
-rw-r--r-- | block/blk-core.c | 14 | ||||
-rw-r--r-- | block/blk-mq.c | 89 | ||||
-rw-r--r-- | block/blk-pm.c | 33 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 |
10 files changed, 261 insertions, 180 deletions
diff --git a/block/Kconfig b/block/Kconfig index 55ae2286a4de..1de4682d48cc 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10 select CRC_T10DIF select CRC64_ROCKSOFT +config BLK_DEV_WRITE_MOUNTED + bool "Allow writing to mounted block devices" + default y + help + When a block device is mounted, writing to its buffer cache is very + likely going to cause filesystem corruption. It is also rather easy to + crash the kernel in this way since the filesystem has no practical way + of detecting these writes to buffer cache and verifying its metadata + integrity. However there are some setups that need this capability + like running fsck on read-only mounted root device, modifying some + features on mounted ext4 filesystem, and similar. If you say N, the + kernel will prevent processes from writing to block devices that are + mounted by filesystems which provides some more protection from runaway + privileged processes and generally makes it much harder to crash + filesystem drivers. Note however that this does not prevent + underlying device(s) from being modified by other means, e.g. by + directly submitting SCSI commands or through access to lower layers of + storage stack. If in doubt, say Y. The configuration can be overridden + with the bdev_allow_write_mounted boot option. + config BLK_DEV_ZONED bool "Zoned block device support" select MQ_IOSCHED_DEADLINE diff --git a/block/badblocks.c b/block/badblocks.c index fc92d4e18aa3..db4ec8b9b2a8 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -1312,12 +1312,14 @@ re_check: prev = prev_badblocks(bb, &bad, hint); /* start after all badblocks */ - if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) { + if ((prev >= 0) && + ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) { len = sectors; goto update_sectors; } - if (overlap_front(bb, prev, &bad)) { + /* Overlapped with front badblocks record */ + if ((prev >= 0) && overlap_front(bb, prev, &bad)) { if (BB_ACK(p[prev])) acked_badblocks++; else diff --git a/block/bdev.c b/block/bdev.c index e4cfb7adb645..e9f1b12bd75c 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -30,6 +30,9 @@ #include "../fs/internal.h" #include "blk.h" +/* Should we allow writing to mounted block devices? */ +static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED); + struct bdev_inode { struct block_device bdev; struct inode vfs_inode; @@ -207,85 +210,88 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) EXPORT_SYMBOL(sync_blockdev_range); /** - * freeze_bdev - lock a filesystem and force it into a consistent state + * bdev_freeze - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last * unfreeze process can unfreeze the frozen filesystem actually when multiple - * freeze requests arrive simultaneously. It counts up in freeze_bdev() and - * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze + * freeze requests arrive simultaneously. It counts up in bdev_freeze() and + * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze * actually. + * + * Return: On success zero is returned, negative error code on failure. */ -int freeze_bdev(struct block_device *bdev) +int bdev_freeze(struct block_device *bdev) { - struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) - goto done; - - sb = get_active_super(bdev); - if (!sb) - goto sync; - if (sb->s_op->freeze_super) - error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); - else - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); - deactivate_super(sb); - if (error) { - bdev->bd_fsfreeze_count--; - goto done; + if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return 0; } - bdev->bd_fsfreeze_sb = sb; -sync: - sync_blockdev(bdev); -done: + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) { + error = bdev->bd_holder_ops->freeze(bdev); + lockdep_assert_not_held(&bdev->bd_holder_lock); + } else { + mutex_unlock(&bdev->bd_holder_lock); + error = sync_blockdev(bdev); + } + + if (error) + atomic_dec(&bdev->bd_fsfreeze_count); + mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } -EXPORT_SYMBOL(freeze_bdev); +EXPORT_SYMBOL(bdev_freeze); /** - * thaw_bdev - unlock filesystem + * bdev_thaw - unlock filesystem * @bdev: blockdevice to unlock * - * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + * Unlocks the filesystem and marks it writeable again after bdev_freeze(). + * + * Return: On success zero is returned, negative error code on failure. */ -int thaw_bdev(struct block_device *bdev) +int bdev_thaw(struct block_device *bdev) { - struct super_block *sb; - int error = -EINVAL; + int error = -EINVAL, nr_freeze; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) + + /* + * If this returns < 0 it means that @bd_fsfreeze_count was + * already 0 and no decrement was performed. + */ + nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count); + if (nr_freeze < 0) goto out; error = 0; - if (--bdev->bd_fsfreeze_count > 0) + if (nr_freeze > 0) goto out; - sb = bdev->bd_fsfreeze_sb; - if (!sb) - goto out; + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) { + error = bdev->bd_holder_ops->thaw(bdev); + lockdep_assert_not_held(&bdev->bd_holder_lock); + } else { + mutex_unlock(&bdev->bd_holder_lock); + } - if (sb->s_op->thaw_super) - error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); - else - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (error) - bdev->bd_fsfreeze_count++; - else - bdev->bd_fsfreeze_sb = NULL; + atomic_inc(&bdev->bd_fsfreeze_count); out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } -EXPORT_SYMBOL(thaw_bdev); +EXPORT_SYMBOL(bdev_thaw); /* * pseudo-fs @@ -425,6 +431,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) void bdev_add(struct block_device *bdev, dev_t dev) { + if (bdev_stable_writes(bdev)) + mapping_set_stable_writes(bdev->bd_inode->i_mapping); bdev->bd_dev = dev; bdev->bd_inode->i_rdev = dev; bdev->bd_inode->i_ino = dev; @@ -727,9 +735,60 @@ void blkdev_put_no_open(struct block_device *bdev) { put_device(&bdev->bd_device); } - + +static bool bdev_writes_blocked(struct block_device *bdev) +{ + return bdev->bd_writers == -1; +} + +static void bdev_block_writes(struct block_device *bdev) +{ + bdev->bd_writers = -1; +} + +static void bdev_unblock_writes(struct block_device *bdev) +{ + bdev->bd_writers = 0; +} + +static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return true; + /* Writes blocked? */ + if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev)) + return false; + if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0) + return false; + return true; +} + +static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Claim exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_block_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers++; +} + +static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Yield exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_unblock_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers--; +} + /** - * blkdev_get_by_dev - open a block device by device number + * bdev_open_by_dev - open a block device by device number * @dev: device number of block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -741,32 +800,46 @@ void blkdev_put_no_open(struct block_device *bdev) * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device - * number. Everything else should use blkdev_get_by_path(). + * number. Everything else should use bdev_open_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) { - bool unblock_events = true; + struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle), + GFP_KERNEL); struct block_device *bdev; + bool unblock_events = true; struct gendisk *disk; int ret; + if (!handle) + return ERR_PTR(-ENOMEM); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev), ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) - return ERR_PTR(ret); + goto free_handle; + + /* Blocking writes requires exclusive opener */ + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) { + ret = -EINVAL; + goto free_handle; + } bdev = blkdev_get_no_open(dev); - if (!bdev) - return ERR_PTR(-ENXIO); + if (!bdev) { + ret = -ENXIO; + goto free_handle; + } disk = bdev->bd_disk; if (holder) { @@ -789,12 +862,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, goto abort_claiming; if (!try_module_get(disk->fops->owner)) goto abort_claiming; + ret = -EBUSY; + if (!bdev_may_open(bdev, mode)) + goto abort_claiming; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; + bdev_claim_write_access(bdev, mode); if (holder) { bd_finish_claiming(bdev, holder, hops); @@ -815,7 +892,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (unblock_events) disk_unblock_events(disk); - return bdev; + handle->bdev = bdev; + handle->holder = holder; + handle->mode = mode; + return handle; put_module: module_put(disk->fops->owner); abort_claiming: @@ -825,34 +905,14 @@ abort_claiming: disk_unblock_events(disk); put_blkdev: blkdev_put_no_open(bdev); +free_handle: + kfree(handle); return ERR_PTR(ret); } -EXPORT_SYMBOL(blkdev_get_by_dev); - -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) -{ - struct bdev_handle *handle = kmalloc(sizeof(*handle), GFP_KERNEL); - struct block_device *bdev; - - if (!handle) - return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (IS_ERR(bdev)) { - kfree(handle); - return ERR_CAST(bdev); - } - handle->bdev = bdev; - handle->holder = holder; - if (holder) - mode |= BLK_OPEN_EXCL; - handle->mode = mode; - return handle; -} EXPORT_SYMBOL(bdev_open_by_dev); /** - * blkdev_get_by_path - open a block device by name + * bdev_open_by_path - open a block device by name * @path: path to the block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -866,29 +926,9 @@ EXPORT_SYMBOL(bdev_open_by_dev); * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops) -{ - struct block_device *bdev; - dev_t dev; - int error; - - error = lookup_bdev(path, &dev); - if (error) - return ERR_PTR(error); - - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, holder); - return ERR_PTR(-EACCES); - } - - return bdev; -} -EXPORT_SYMBOL(blkdev_get_by_path); - struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { @@ -911,8 +951,9 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_open_by_path); -void blkdev_put(struct block_device *bdev, void *holder) +void bdev_release(struct bdev_handle *handle) { + struct block_device *bdev = handle->bdev; struct gendisk *disk = bdev->bd_disk; /* @@ -926,8 +967,10 @@ void blkdev_put(struct block_device *bdev, void *holder) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (holder) - bd_end_claim(bdev, holder); + bdev_yield_write_access(bdev, handle->mode); + + if (handle->holder) + bd_end_claim(bdev, handle->holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -944,12 +987,6 @@ void blkdev_put(struct block_device *bdev, void *holder) module_put(disk->fops->owner); blkdev_put_no_open(bdev); -} -EXPORT_SYMBOL(blkdev_put); - -void bdev_release(struct bdev_handle *handle) -{ - blkdev_put(handle->bdev, handle->holder); kfree(handle); } EXPORT_SYMBOL(bdev_release); @@ -1100,3 +1137,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) blkdev_put_no_open(bdev); } + +static int __init setup_bdev_allow_write_mounted(char *str) +{ + if (kstrtobool(str, &bdev_allow_write_mounted)) + pr_warn("Invalid option string for bdev_allow_write_mounted:" + " '%s'\n", str); + return 1; +} +__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a42ea2972ad..4b48c2c44098 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -577,6 +577,7 @@ static void blkg_destroy_all(struct gendisk *disk) struct request_queue *q = disk->queue; struct blkcg_gq *blkg, *n; int count = BLKG_DESTROY_BATCH_SIZE; + int i; restart: spin_lock_irq(&q->queue_lock); @@ -602,6 +603,18 @@ restart: } } + /* + * Mark policy deactivated since policy offline has been done, and + * the free is scheduled, so future blkcg_deactivate_policy() can + * be bypassed + */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (pol) + __clear_bit(pol->plid, q->blkcg_pols); + } + q->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 624c03c8fe64..fd482439afbc 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -249,8 +249,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, { struct blkcg_gq *blkg; - WARN_ON_ONCE(!rcu_read_lock_held()); - if (blkcg == &blkcg_root) return q->root_blkg; diff --git a/block/blk-core.c b/block/blk-core.c index fdf25b8d6e78..2eca76ccf4ee 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio) if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return; - pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", - bio->bi_bdev); - /* Older lvm-tools actually trigger this */ + + if (bio->bi_bdev->bd_ro_warned) + return; + + bio->bi_bdev->bd_ro_warned = true; + /* + * Use ioctl to set underlying disk of raid/dm to read-only + * will trigger this. + */ + pr_warn("Trying to write to read-only block-device %pg\n", + bio->bi_bdev); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index e2d11183f62e..ac18f802c027 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); +static bool blk_is_flush_data_rq(struct request *rq) +{ + return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq); +} + static bool blk_mq_rq_inflight(struct request *rq, void *priv) { /* * If we find a request that isn't idle we know the queue is busy * as it's checked in the iter. * Return false to stop the iteration. + * + * In case of queue quiesce, if one flush data request is completed, + * don't count it as inflight given the flush sequence is suspended, + * and the original flush data request is invisible to driver, just + * like other pending requests because of quiesce */ - if (blk_mq_request_started(rq)) { + if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) && + blk_is_flush_data_rq(rq) && + blk_mq_request_completed(rq))) { bool *busy = priv; *busy = true; @@ -2858,11 +2870,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; + return NULL; rq_qos_throttle(q, bio); @@ -2878,35 +2887,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: - blk_queue_exit(q); return NULL; } -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, - struct blk_plug *plug, struct bio **bio, unsigned int nsegs) +/* return true if this @rq can be used for @bio */ +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, + struct bio *bio) { - struct request *rq; - enum hctx_type type, hctx_type; + enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); + enum hctx_type hctx_type = rq->mq_hctx->type; - if (!plug) - return NULL; - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; - - if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { - *bio = NULL; - return NULL; - } + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); - type = blk_mq_get_hctx_type((*bio)->bi_opf); - hctx_type = rq->mq_hctx->type; if (type != hctx_type && !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) - return NULL; - if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) - return NULL; + return false; + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) + return false; /* * If any qos ->throttle() end up blocking, we will have flushed the @@ -2914,12 +2911,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, * before we throttle. */ plug->cached_rq = rq_list_next(rq); - rq_qos_throttle(q, *bio); + rq_qos_throttle(rq->q, bio); blk_mq_rq_time_init(rq, 0); - rq->cmd_flags = (*bio)->bi_opf; + rq->cmd_flags = bio->bi_opf; INIT_LIST_HEAD(&rq->queuelist); - return rq; + return true; } static void bio_set_ioprio(struct bio *bio) @@ -2949,7 +2946,7 @@ void blk_mq_submit_bio(struct bio *bio) struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); struct blk_mq_hw_ctx *hctx; - struct request *rq; + struct request *rq = NULL; unsigned int nr_segs = 1; blk_status_t ret; @@ -2960,20 +2957,36 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (!bio_integrity_prep(bio)) - return; - bio_set_ioprio(bio); - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); - if (!rq) { - if (!bio) + if (plug) { + rq = rq_list_peek(&plug->cached_rq); + if (rq && rq->q != q) + rq = NULL; + } + if (rq) { + if (!bio_integrity_prep(bio)) return; - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) return; + if (blk_mq_can_use_cached_rq(rq, plug, bio)) + goto done; + percpu_ref_get(&q->q_usage_counter); + } else { + if (unlikely(bio_queue_enter(bio))) + return; + if (!bio_integrity_prep(bio)) + goto fail; } + rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + if (unlikely(!rq)) { +fail: + blk_queue_exit(q); + return; + } + +done: trace_block_getrq(bio); rq_qos_track(q, rq, bio); diff --git a/block/blk-pm.c b/block/blk-pm.c index 6b72b2e03fc8..42e842074715 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -163,39 +163,16 @@ EXPORT_SYMBOL(blk_pre_runtime_resume); * @q: the queue of the device * * Description: - * For historical reasons, this routine merely calls blk_set_runtime_active() - * to do the real work of restarting the queue. It does this regardless of - * whether the device's runtime-resume succeeded; even if it failed the + * Restart the queue of a runtime suspended device. It does this regardless + * of whether the device's runtime-resume succeeded; even if it failed the * driver or error handler will need to communicate with the device. * * This function should be called near the end of the device's - * runtime_resume callback. + * runtime_resume callback to correct queue runtime PM status and re-enable + * peeking requests from the queue. */ void blk_post_runtime_resume(struct request_queue *q) { - blk_set_runtime_active(q); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - * - * This function is also called by blk_post_runtime_resume() for - * runtime resumes. It does everything necessary to restart the queue. - */ -void blk_set_runtime_active(struct request_queue *q) -{ int old_status; if (!q->dev) @@ -211,4 +188,4 @@ void blk_set_runtime_active(struct request_queue *q) if (old_status != RPM_ACTIVE) blk_clear_pm_only(q); } -EXPORT_SYMBOL(blk_set_runtime_active); +EXPORT_SYMBOL(blk_post_runtime_resume); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 63e481262336..0b2d04766324 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); #endif +/* Common attributes for bio-based and request-based queues. */ static struct attribute *queue_attrs[] = { &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, @@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = { NULL, }; +/* Request-based queue attributes that are not relevant for bio-based queues. */ static struct attribute *blk_mq_queue_attrs[] = { &queue_requests_entry.attr, &elv_iosched_entry.attr, diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 13e4377a8b28..16f5766620a4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1320,6 +1320,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE), tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE)); + rcu_read_lock(); /* * Update has_rules[] flags for the updated tg's subtree. A tg is * considered to have rules if either the tg itself or any of its @@ -1347,6 +1348,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) this_tg->latency_target = max(this_tg->latency_target, parent_tg->latency_target); } + rcu_read_unlock(); /* * We're already holding queue_lock and know @tg is valid. Let's |