From 37e58237a16b94fcd2c2d1b7e9c6e1ca661c231b Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Tue, 22 Mar 2016 00:24:44 -0700 Subject: block: add offset in blk_add_request_payload() We could kmalloc() the payload, so need the offset in page. Signed-off-by: Ming Lin Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 669e419d6234..bbaa76757018 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -779,7 +779,7 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, - unsigned int len); + int offset, unsigned int len); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, -- cgit From e0489487ec9cd79ee1fa0dc5d3789c08b0e51a2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 10 Mar 2016 13:58:46 +0200 Subject: blk-mq: Export tagset iter function Its useful to iterate on all the active tags in cases where we will need to fail all the queues IO. Signed-off-by: Sagi Grimberg [hch: carefully check for valid tagsets] Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 12 ++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index abdbb47405cb..2fd04286f103 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -474,6 +474,18 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, } EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); +void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv) +{ + int i; + + for (i = 0; i < tagset->nr_hw_queues; i++) { + if (tagset->tags && tagset->tags[i]) + blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv); + } +} +EXPORT_SYMBOL(blk_mq_tagset_busy_iter); + void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ac9799b702b..c808fec1ce44 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -240,6 +240,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); +void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); -- cgit From 93e9d8e836cb1a9a58b33eb6643bf061c6119ef2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2016 12:32:46 -0600 Subject: block: add ability to flag write back caching on a device Add an internal helper and flag for setting whether a queue has write back caching, or write through (or none). Add a sysfs file to show this as well, and make it changeable from user space. This will replace the (awkward) blk_queue_flush() interface that drivers currently use to inform the block layer of write cache state and capabilities. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- Documentation/block/queue-sysfs.txt | 9 +++++++++ block/blk-settings.c | 26 +++++++++++++++++++++++++ block/blk-sysfs.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 3 +++ 4 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e5d914845be6..dce25d848d92 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -141,6 +141,15 @@ control of this block device to that new IO scheduler. Note that writing an IO scheduler name to this file will attempt to load that IO scheduler module, if it isn't already present in the system. +write_cache (RW) +---------------- +When read, this file will display whether the device has write back +caching enabled or not. It will return "write back" for the former +case, and "write through" for the latter. Writing to this file can +change the kernels view of the device, but it doesn't alter the +device state. This means that it might not be safe to toggle the +setting from "write back" to "write through", since that will also +eliminate cache flushes issued by the kernel. Jens Axboe , February 2009 diff --git a/block/blk-settings.c b/block/blk-settings.c index 331e4eee0dda..c903bee43cf8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -846,6 +846,32 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); +/** + * blk_queue_write_cache - configure queue's write cache + * @q: the request queue for the device + * @wc: write back cache on or off + * @fua: device supports FUA writes, if true + * + * Tell the block layer about the write cache of @q. + */ +void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) +{ + spin_lock_irq(q->queue_lock); + if (wc) { + queue_flag_set(QUEUE_FLAG_WC, q); + q->flush_flags = REQ_FLUSH; + } else + queue_flag_clear(QUEUE_FLAG_WC, q); + if (fua) { + if (wc) + q->flush_flags |= REQ_FUA; + queue_flag_set(QUEUE_FLAG_FUA, q); + } else + queue_flag_clear(QUEUE_FLAG_FUA, q); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(blk_queue_write_cache); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 995b58d46ed1..99205965f559 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -347,6 +347,38 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_wc_show(struct request_queue *q, char *page) +{ + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + return sprintf(page, "write back\n"); + + return sprintf(page, "write through\n"); +} + +static ssize_t queue_wc_store(struct request_queue *q, const char *page, + size_t count) +{ + int set = -1; + + if (!strncmp(page, "write back", 10)) + set = 1; + else if (!strncmp(page, "write through", 13) || + !strncmp(page, "none", 4)) + set = 0; + + if (set == -1) + return -EINVAL; + + spin_lock_irq(q->queue_lock); + if (set) + queue_flag_set(QUEUE_FLAG_WC, q); + else + queue_flag_clear(QUEUE_FLAG_WC, q); + spin_unlock_irq(q->queue_lock); + + return count; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -478,6 +510,12 @@ static struct queue_sysfs_entry queue_poll_entry = { .store = queue_poll_store, }; +static struct queue_sysfs_entry queue_wc_entry = { + .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR }, + .show = queue_wc_show, + .store = queue_wc_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -503,6 +541,7 @@ static struct attribute *default_attrs[] = { &queue_iostats_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, + &queue_wc_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bbaa76757018..ba72687c5654 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -491,6 +491,8 @@ struct request_queue { #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 23 /* Write back caching */ +#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1009,6 +1011,7 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); +extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); -- cgit From 1fcbcc333f1fae6e11cc0839a6e72bc1a3e830bf Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 25 Apr 2016 16:50:14 -0700 Subject: block: copy NOMERGE flag from bio to request bio might have NOMERGE flag set, for example blk_queue_split sets it. When we initiate request, copy this flag too. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 86a38ea1823f..77e5d81f07aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -208,7 +208,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE | REQ_INTEGRITY) + REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) -- cgit From 38f252553300ee1d3346a5273e95fe1dd60ca50a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 16 Apr 2016 14:55:28 -0400 Subject: block: add __blkdev_issue_discard This is a version of blkdev_issue_discard which doesn't wait for the I/O to complete, but instead allows the caller to submit the final bio and/or chain it to others. Signed-off-by: Christoph Hellwig Signed-off-by: Ming Lin Signed-off-by: Sagi Grimberg Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-lib.c | 63 +++++++++++++++++++++++++++++--------------------- include/linux/blkdev.h | 2 ++ 2 files changed, 39 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/block/blk-lib.c b/block/blk-lib.c index 700d248cbde5..ccbce2b2ea05 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -22,45 +22,25 @@ static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages, return new; } -/** - * blkdev_issue_discard - queue a discard - * @bdev: blockdev to issue discard for - * @sector: start sector - * @nr_sects: number of sectors to discard - * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_IFL_* flags to control behaviour - * - * Description: - * Issue a discard request for the sectors in question. - */ -int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) +int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop) { struct request_queue *q = bdev_get_queue(bdev); - int type = REQ_WRITE | REQ_DISCARD; + struct bio *bio = *biop; unsigned int granularity; int alignment; - struct bio *bio = NULL; - int ret = 0; - struct blk_plug plug; if (!q) return -ENXIO; - if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((type & REQ_SECURE) && !blk_queue_secdiscard(q)) + return -EOPNOTSUPP; /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secdiscard(q)) - return -EOPNOTSUPP; - type |= REQ_SECURE; - } - - blk_start_plug(&plug); while (nr_sects) { unsigned int req_sects; sector_t end_sect, tmp; @@ -98,7 +78,38 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, */ cond_resched(); } - if (bio) + + *biop = bio; + return 0; +} +EXPORT_SYMBOL(__blkdev_issue_discard); + +/** + * blkdev_issue_discard - queue a discard + * @bdev: blockdev to issue discard for + * @sector: start sector + * @nr_sects: number of sectors to discard + * @gfp_mask: memory allocation flags (for bio_alloc) + * @flags: BLKDEV_IFL_* flags to control behaviour + * + * Description: + * Issue a discard request for the sectors in question. + */ +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) +{ + int type = REQ_WRITE | REQ_DISCARD; + struct bio *bio = NULL; + struct blk_plug plug; + int ret; + + if (flags & BLKDEV_DISCARD_SECURE) + type |= REQ_SECURE; + + blk_start_plug(&plug); + ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type, + &bio); + if (!ret && bio) ret = submit_bio_wait(type, bio); blk_finish_plug(&plug); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba72687c5654..b79131acf6c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1131,6 +1131,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); +extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, -- cgit From 0ef5a50c1658d4d96a44f145bcb92ff3310c75b1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 May 2016 11:54:22 -0400 Subject: block: make bio_inc_remaining() interface accessible again Commit 326e1dbb57 ("block: remove management of bi_remaining when restoring original bi_end_io") made bio_inc_remaining() private to bio.c because the only use-case that made sense was confined to the bio_chain() interface. Since that time DM thinp went on to use bio_chain() in its relatively complex implementation of async discard support. That implementation, even when converted over to use the new async __blkdev_issue_discard() interface, depends on deferred completion of the original discard bio -- which is most appropriately implemented using bio_inc_remaining(). DM thinp foolishly duplicated bio_inc_remaining(), local to dm-thin.c as __bio_inc_remaining(), so re-exporting bio_inc_remaining() allows us to put an end to that foolishness. All said, bio_inc_remaining() should really only be used in conjunction with bio_chain(). It isn't intended for generic bio reference counting. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Signed-off-by: Jens Axboe --- block/bio.c | 11 ----------- include/linux/bio.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 807d25e466ec..0e4aa42bc30d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -311,17 +311,6 @@ static void bio_chain_endio(struct bio *bio) bio_endio(__bio_chain_endio(bio)); } -/* - * Increment chain count for the bio. Make sure the CHAIN flag update - * is visible before the raised count. - */ -static inline void bio_inc_remaining(struct bio *bio) -{ - bio_set_flag(bio, BIO_CHAIN); - smp_mb__before_atomic(); - atomic_inc(&bio->__bi_remaining); -} - /** * bio_chain - chain bio completions * @bio: the target bio diff --git a/include/linux/bio.h b/include/linux/bio.h index 6b7481f62218..9faebf7f9a33 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -702,6 +702,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio_set_flag(bio, BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. -- cgit