diff options
Diffstat (limited to 'include/linux/blkdev.h')
| -rw-r--r-- | include/linux/blkdev.h | 295 |
1 files changed, 198 insertions, 97 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ce8a372d506..9af3e0f430bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,6 +27,8 @@ #include <linux/percpu-refcount.h> #include <linux/scatterlist.h> #include <linux/blkzoned.h> +#include <linux/seqlock.h> +#include <linux/u64_stats_sync.h> struct module; struct scsi_ioctl_command; @@ -47,7 +49,7 @@ struct blk_stat_callback; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -/* Must be consisitent with blk_mq_poll_stats_bkt() */ +/* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 /* @@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t; /* Look at ->special_vec for the actual data payload instead of the bio chain. */ #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) +/* The per-zone write lock is held for this request */ +#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* timeout is expired */ +#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) +/* already slept for hybrid poll */ +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t; * especially blk_mq_rq_ctx_init() to take care of the added fields. */ struct request { - struct list_head queuelist; - union { - struct __call_single_data csd; - u64 fifo_time; - }; - struct request_queue *q; struct blk_mq_ctx *mq_ctx; @@ -148,8 +150,6 @@ struct request { int internal_tag; - unsigned long atomic_flags; - /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ int tag; @@ -158,6 +158,8 @@ struct request { struct bio *bio; struct bio *biotail; + struct list_head queuelist; + /* * The hash is used inside the scheduler, and killed once the * request reaches the dispatch list. The ipi_list is only used @@ -205,19 +207,16 @@ struct request { struct hd_struct *part; unsigned long start_time; struct blk_issue_stat issue_stat; -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ - unsigned long long start_time_ns; - unsigned long long io_start_time_ns; /* when passed to hardware */ -#endif /* Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments; + #if defined(CONFIG_BLK_DEV_INTEGRITY) unsigned short nr_integrity_segments; #endif + unsigned short write_hint; unsigned short ioprio; unsigned int timeout; @@ -226,11 +225,37 @@ struct request { unsigned int extra_len; /* length of alignment and padding */ - unsigned short write_hint; + /* + * On blk-mq, the lower bits of ->gstate (generation number and + * state) carry the MQ_RQ_* state value and the upper bits the + * generation number which is monotonically incremented and used to + * distinguish the reuse instances. + * + * ->gstate_seq allows updates to ->gstate and other fields + * (currently ->deadline) during request start to be read + * atomically from the timeout path, so that it can operate on a + * coherent set of information. + */ + seqcount_t gstate_seq; + u64 gstate; + + /* + * ->aborted_gstate is used by the timeout to claim a specific + * recycle instance of this request. See blk_mq_timeout_work(). + */ + struct u64_stats_sync aborted_gstate_sync; + u64 aborted_gstate; + + /* access through blk_rq_set_deadline, blk_rq_deadline */ + unsigned long __deadline; - unsigned long deadline; struct list_head timeout_list; + union { + struct __call_single_data csd; + u64 fifo_time; + }; + /* * completion callback. */ @@ -239,6 +264,12 @@ struct request { /* for bidi */ struct request *next_rq; + +#ifdef CONFIG_BLK_CGROUP + struct request_list *rl; /* rl this rq is alloced from */ + unsigned long long start_time_ns; + unsigned long long io_start_time_ns; /* when passed to hardware */ +#endif }; static inline bool blk_op_is_scsi(unsigned int op) @@ -564,6 +595,22 @@ struct request_queue { struct queue_limits limits; /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit clear) or + * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. All three fields are + * initialized by the low level device driver (e.g. scsi/sd.c). + * Stacking drivers (device mappers) may or may not initialize + * these fields. + */ + unsigned int nr_zones; + unsigned long *seq_zones_bitmap; + unsigned long *seq_zones_wlock; + + /* * sg stuff */ unsigned int sg_timeout; @@ -660,73 +707,10 @@ struct request_queue { (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) -/* - * @q->queue_lock is set while a queue is being initialized. Since we know - * that no other threads access the queue object before @q->queue_lock has - * been set, it is safe to manipulate queue flags without holding the - * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and - * blk_init_allocated_queue(). - */ -static inline void queue_lockdep_assert_held(struct request_queue *q) -{ - if (q->queue_lock) - lockdep_assert_held(q->queue_lock); -} - -static inline void queue_flag_set_unlocked(unsigned int flag, - struct request_queue *q) -{ - __set_bit(flag, &q->queue_flags); -} - -static inline int queue_flag_test_and_clear(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (test_bit(flag, &q->queue_flags)) { - __clear_bit(flag, &q->queue_flags); - return 1; - } - - return 0; -} - -static inline int queue_flag_test_and_set(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (!test_bit(flag, &q->queue_flags)) { - __set_bit(flag, &q->queue_flags); - return 0; - } - - return 1; -} - -static inline void queue_flag_set(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __set_bit(flag, &q->queue_flags); -} - -static inline void queue_flag_clear_unlocked(unsigned int flag, - struct request_queue *q) -{ - __clear_bit(flag, &q->queue_flags); -} - -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - -static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __clear_bit(flag, &q->queue_flags); -} +void blk_queue_flag_set(unsigned int flag, struct request_queue *q); +void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -757,6 +741,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) extern int blk_set_preempt_only(struct request_queue *q); extern void blk_clear_preempt_only(struct request_queue *q); +static inline int queue_in_flight(struct request_queue *q) +{ + return q->in_flight[0] + q->in_flight[1]; +} + static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); @@ -807,6 +796,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } +static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +{ + return q->nr_zones; +} + +static inline unsigned int blk_queue_zone_no(struct request_queue *q, + sector_t sector) +{ + if (!blk_queue_is_zoned(q)) + return 0; + return sector >> ilog2(q->limits.chunk_sectors); +} + +static inline bool blk_queue_zone_is_seq(struct request_queue *q, + sector_t sector) +{ + if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) + return false; + return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); +} + static inline bool rq_is_sync(struct request *rq) { return op_is_sync(rq->cmd_flags); @@ -1012,6 +1022,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) } /* + * The basic unit of block I/O is a sector. It is used in a number of contexts + * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 + * bytes. Variables of type sector_t represent an offset or size that is a + * multiple of 512 bytes. Hence these two constants. + */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + +/* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request * blk_rq_cur_bytes() : bytes left in the current segment @@ -1038,12 +1061,22 @@ extern unsigned int blk_rq_err_bytes(const struct request *rq); static inline unsigned int blk_rq_sectors(const struct request *rq) { - return blk_rq_bytes(rq) >> 9; + return blk_rq_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return blk_rq_cur_bytes(rq) >> 9; + return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; +} + +static inline unsigned int blk_rq_zone_no(struct request *rq) +{ + return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); +} + +static inline unsigned int blk_rq_zone_is_seq(struct request *rq) +{ + return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); } /* @@ -1063,7 +1096,8 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, UINT_MAX >> 9); + return min(q->limits.max_discard_sectors, + UINT_MAX >> SECTOR_SHIFT); if (unlikely(op == REQ_OP_WRITE_SAME)) return q->limits.max_write_same_sectors; @@ -1243,7 +1277,8 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t, int); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, + spinlock_t *lock); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1374,16 +1409,21 @@ extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + return blkdev_issue_discard(sb->s_bdev, + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, flags); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) { return blkdev_issue_zeroout(sb->s_bdev, - block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, 0); } @@ -1490,7 +1530,8 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = sector_div(sector, granularity >> 9) << 9; + unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) + << SECTOR_SHIFT; return (granularity + lim->alignment_offset - alignment) % granularity; } @@ -1524,8 +1565,8 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return 0; /* Why are these in bytes, not sectors? */ - alignment = lim->discard_alignment >> 9; - granularity = lim->discard_granularity >> 9; + alignment = lim->discard_alignment >> SECTOR_SHIFT; + granularity = lim->discard_granularity >> SECTOR_SHIFT; if (!granularity) return 0; @@ -1536,7 +1577,7 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector offset = (granularity + alignment - offset) % granularity; /* Turn it back into bytes, gaah */ - return offset << 9; + return offset << SECTOR_SHIFT; } static inline int bdev_discard_alignment(struct block_device *bdev) @@ -1595,7 +1636,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) if (q) return blk_queue_zone_sectors(q); + return 0; +} + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + if (q) + return blk_queue_nr_zones(q); return 0; } @@ -1731,8 +1780,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); -int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); -int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP @@ -1971,6 +2018,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); + +#ifdef CONFIG_BLK_DEV_ZONED +bool blk_req_needs_zone_write_lock(struct request *rq); +void __blk_req_zone_write_lock(struct request *rq); +void __blk_req_zone_write_unlock(struct request *rq); + +static inline void blk_req_zone_write_lock(struct request *rq) +{ + if (blk_req_needs_zone_write_lock(rq)) + __blk_req_zone_write_lock(rq); +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ + if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) + __blk_req_zone_write_unlock(rq); +} + +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return rq->q->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + if (!blk_req_needs_zone_write_lock(rq)) + return true; + return !blk_req_zone_is_write_locked(rq); +} +#else +static inline bool blk_req_needs_zone_write_lock(struct request *rq) +{ + return false; +} + +static inline void blk_req_zone_write_lock(struct request *rq) +{ +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ +} +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return false; +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + return true; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + #else /* CONFIG_BLOCK */ struct block_device; |