aboutsummaryrefslogtreecommitdiff
path: root/include/linux/blkdev.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/blkdev.h')
-rw-r--r--include/linux/blkdev.h295
1 files changed, 198 insertions, 97 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0ce8a372d506..9af3e0f430bc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,8 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
+#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
struct module;
struct scsi_ioctl_command;
@@ -47,7 +49,7 @@ struct blk_stat_callback;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
-/* Must be consisitent with blk_mq_poll_stats_bkt() */
+/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
/*
@@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t;
/* Look at ->special_vec for the actual data payload instead of the
bio chain. */
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
+/* The per-zone write lock is held for this request */
+#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
+/* timeout is expired */
+#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
+/* already slept for hybrid poll */
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
@@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t;
* especially blk_mq_rq_ctx_init() to take care of the added fields.
*/
struct request {
- struct list_head queuelist;
- union {
- struct __call_single_data csd;
- u64 fifo_time;
- };
-
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
@@ -148,8 +150,6 @@ struct request {
int internal_tag;
- unsigned long atomic_flags;
-
/* the following two fields are internal, NEVER access directly */
unsigned int __data_len; /* total data len */
int tag;
@@ -158,6 +158,8 @@ struct request {
struct bio *bio;
struct bio *biotail;
+ struct list_head queuelist;
+
/*
* The hash is used inside the scheduler, and killed once the
* request reaches the dispatch list. The ipi_list is only used
@@ -205,19 +207,16 @@ struct request {
struct hd_struct *part;
unsigned long start_time;
struct blk_issue_stat issue_stat;
-#ifdef CONFIG_BLK_CGROUP
- struct request_list *rl; /* rl this rq is alloced from */
- unsigned long long start_time_ns;
- unsigned long long io_start_time_ns; /* when passed to hardware */
-#endif
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)
unsigned short nr_integrity_segments;
#endif
+ unsigned short write_hint;
unsigned short ioprio;
unsigned int timeout;
@@ -226,11 +225,37 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
- unsigned short write_hint;
+ /*
+ * On blk-mq, the lower bits of ->gstate (generation number and
+ * state) carry the MQ_RQ_* state value and the upper bits the
+ * generation number which is monotonically incremented and used to
+ * distinguish the reuse instances.
+ *
+ * ->gstate_seq allows updates to ->gstate and other fields
+ * (currently ->deadline) during request start to be read
+ * atomically from the timeout path, so that it can operate on a
+ * coherent set of information.
+ */
+ seqcount_t gstate_seq;
+ u64 gstate;
+
+ /*
+ * ->aborted_gstate is used by the timeout to claim a specific
+ * recycle instance of this request. See blk_mq_timeout_work().
+ */
+ struct u64_stats_sync aborted_gstate_sync;
+ u64 aborted_gstate;
+
+ /* access through blk_rq_set_deadline, blk_rq_deadline */
+ unsigned long __deadline;
- unsigned long deadline;
struct list_head timeout_list;
+ union {
+ struct __call_single_data csd;
+ u64 fifo_time;
+ };
+
/*
* completion callback.
*/
@@ -239,6 +264,12 @@ struct request {
/* for bidi */
struct request *next_rq;
+
+#ifdef CONFIG_BLK_CGROUP
+ struct request_list *rl; /* rl this rq is alloced from */
+ unsigned long long start_time_ns;
+ unsigned long long io_start_time_ns; /* when passed to hardware */
+#endif
};
static inline bool blk_op_is_scsi(unsigned int op)
@@ -564,6 +595,22 @@ struct request_queue {
struct queue_limits limits;
/*
+ * Zoned block device information for request dispatch control.
+ * nr_zones is the total number of zones of the device. This is always
+ * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
+ * bits which indicates if a zone is conventional (bit clear) or
+ * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
+ * bits which indicates if a zone is write locked, that is, if a write
+ * request targeting the zone was dispatched. All three fields are
+ * initialized by the low level device driver (e.g. scsi/sd.c).
+ * Stacking drivers (device mappers) may or may not initialize
+ * these fields.
+ */
+ unsigned int nr_zones;
+ unsigned long *seq_zones_bitmap;
+ unsigned long *seq_zones_wlock;
+
+ /*
* sg stuff
*/
unsigned int sg_timeout;
@@ -660,73 +707,10 @@ struct request_queue {
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_POLL))
-/*
- * @q->queue_lock is set while a queue is being initialized. Since we know
- * that no other threads access the queue object before @q->queue_lock has
- * been set, it is safe to manipulate queue flags without holding the
- * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
- * blk_init_allocated_queue().
- */
-static inline void queue_lockdep_assert_held(struct request_queue *q)
-{
- if (q->queue_lock)
- lockdep_assert_held(q->queue_lock);
-}
-
-static inline void queue_flag_set_unlocked(unsigned int flag,
- struct request_queue *q)
-{
- __set_bit(flag, &q->queue_flags);
-}
-
-static inline int queue_flag_test_and_clear(unsigned int flag,
- struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
-
- if (test_bit(flag, &q->queue_flags)) {
- __clear_bit(flag, &q->queue_flags);
- return 1;
- }
-
- return 0;
-}
-
-static inline int queue_flag_test_and_set(unsigned int flag,
- struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
-
- if (!test_bit(flag, &q->queue_flags)) {
- __set_bit(flag, &q->queue_flags);
- return 0;
- }
-
- return 1;
-}
-
-static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
- __set_bit(flag, &q->queue_flags);
-}
-
-static inline void queue_flag_clear_unlocked(unsigned int flag,
- struct request_queue *q)
-{
- __clear_bit(flag, &q->queue_flags);
-}
-
-static inline int queue_in_flight(struct request_queue *q)
-{
- return q->in_flight[0] + q->in_flight[1];
-}
-
-static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
- __clear_bit(flag, &q->queue_flags);
-}
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
@@ -757,6 +741,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
extern int blk_set_preempt_only(struct request_queue *q);
extern void blk_clear_preempt_only(struct request_queue *q);
+static inline int queue_in_flight(struct request_queue *q)
+{
+ return q->in_flight[0] + q->in_flight[1];
+}
+
static inline bool blk_account_rq(struct request *rq)
{
return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
@@ -807,6 +796,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+ return q->nr_zones;
+}
+
+static inline unsigned int blk_queue_zone_no(struct request_queue *q,
+ sector_t sector)
+{
+ if (!blk_queue_is_zoned(q))
+ return 0;
+ return sector >> ilog2(q->limits.chunk_sectors);
+}
+
+static inline bool blk_queue_zone_is_seq(struct request_queue *q,
+ sector_t sector)
+{
+ if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
+ return false;
+ return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
+}
+
static inline bool rq_is_sync(struct request *rq)
{
return op_is_sync(rq->cmd_flags);
@@ -1012,6 +1022,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
}
/*
+ * The basic unit of block I/O is a sector. It is used in a number of contexts
+ * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
+ * bytes. Variables of type sector_t represent an offset or size that is a
+ * multiple of 512 bytes. Hence these two constants.
+ */
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+#ifndef SECTOR_SIZE
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#endif
+
+/*
* blk_rq_pos() : the current sector
* blk_rq_bytes() : bytes left in the entire request
* blk_rq_cur_bytes() : bytes left in the current segment
@@ -1038,12 +1061,22 @@ extern unsigned int blk_rq_err_bytes(const struct request *rq);
static inline unsigned int blk_rq_sectors(const struct request *rq)
{
- return blk_rq_bytes(rq) >> 9;
+ return blk_rq_bytes(rq) >> SECTOR_SHIFT;
}
static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
{
- return blk_rq_cur_bytes(rq) >> 9;
+ return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
+}
+
+static inline unsigned int blk_rq_zone_no(struct request *rq)
+{
+ return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
+}
+
+static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
+{
+ return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
}
/*
@@ -1063,7 +1096,8 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
int op)
{
if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
- return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+ return min(q->limits.max_discard_sectors,
+ UINT_MAX >> SECTOR_SHIFT);
if (unlikely(op == REQ_OP_WRITE_SAME))
return q->limits.max_write_same_sectors;
@@ -1243,7 +1277,8 @@ extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t, int);
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+ spinlock_t *lock);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
@@ -1374,16 +1409,21 @@ extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{
- return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
- nr_blocks << (sb->s_blocksize_bits - 9),
+ return blkdev_issue_discard(sb->s_bdev,
+ block << (sb->s_blocksize_bits -
+ SECTOR_SHIFT),
+ nr_blocks << (sb->s_blocksize_bits -
+ SECTOR_SHIFT),
gfp_mask, flags);
}
static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask)
{
return blkdev_issue_zeroout(sb->s_bdev,
- block << (sb->s_blocksize_bits - 9),
- nr_blocks << (sb->s_blocksize_bits - 9),
+ block << (sb->s_blocksize_bits -
+ SECTOR_SHIFT),
+ nr_blocks << (sb->s_blocksize_bits -
+ SECTOR_SHIFT),
gfp_mask, 0);
}
@@ -1490,7 +1530,8 @@ static inline int queue_alignment_offset(struct request_queue *q)
static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
{
unsigned int granularity = max(lim->physical_block_size, lim->io_min);
- unsigned int alignment = sector_div(sector, granularity >> 9) << 9;
+ unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
+ << SECTOR_SHIFT;
return (granularity + lim->alignment_offset - alignment) % granularity;
}
@@ -1524,8 +1565,8 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
return 0;
/* Why are these in bytes, not sectors? */
- alignment = lim->discard_alignment >> 9;
- granularity = lim->discard_granularity >> 9;
+ alignment = lim->discard_alignment >> SECTOR_SHIFT;
+ granularity = lim->discard_granularity >> SECTOR_SHIFT;
if (!granularity)
return 0;
@@ -1536,7 +1577,7 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
offset = (granularity + alignment - offset) % granularity;
/* Turn it back into bytes, gaah */
- return offset << 9;
+ return offset << SECTOR_SHIFT;
}
static inline int bdev_discard_alignment(struct block_device *bdev)
@@ -1595,7 +1636,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
if (q)
return blk_queue_zone_sectors(q);
+ return 0;
+}
+
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ if (q)
+ return blk_queue_nr_zones(q);
return 0;
}
@@ -1731,8 +1780,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
-int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
@@ -1971,6 +2018,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+bool blk_req_needs_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_unlock(struct request *rq);
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+ if (blk_req_needs_zone_write_lock(rq))
+ __blk_req_zone_write_lock(rq);
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+ if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
+ __blk_req_zone_write_unlock(rq);
+}
+
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return rq->q->seq_zones_wlock &&
+ test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ if (!blk_req_needs_zone_write_lock(rq))
+ return true;
+ return !blk_req_zone_is_write_locked(rq);
+}
+#else
+static inline bool blk_req_needs_zone_write_lock(struct request *rq)
+{
+ return false;
+}
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+}
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return false;
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ return true;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
#else /* CONFIG_BLOCK */
struct block_device;