aboutsummaryrefslogtreecommitdiff
path: root/block/blk.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:33:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:33:06 +0200
commit26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4 (patch)
treea08d01893b603d2f611a617f6055b54a835c03f0 /block/blk.h
parent3a4d319a8fb5a9bbdf5b31ef32841eb286b1dcc2 (diff)
parentd4d7c03f7ee1d7f16b7b6e885b1e00968f72b93c (diff)
Merge tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - MD changes via Song: - md-bitmap refactoring (Yu Kuai) - raid5 performance optimization (Artur Paszkiewicz) - Other small fixes (Yu Kuai, Chen Ni) - Add a sysfs entry 'new_level' (Xiao Ni) - Improve information reported in /proc/mdstat (Mateusz Kusiak) - NVMe changes via Keith: - Asynchronous namespace scanning (Stuart) - TCP TLS updates (Hannes) - RDMA queue controller validation (Niklas) - Align field names to the spec (Anuj) - Metadata support validation (Puranjay) - A syntax cleanup (Shen) - Fix a Kconfig linking error (Arnd) - New queue-depth quirk (Keith) - Add missing unplug trace event (Keith) - blk-iocost fixes (Colin, Konstantin) - t10-pi modular removal and fixes (Alexey) - Fix for potential BLKSECDISCARD overflow (Alexey) - bio splitting cleanups and fixes (Christoph) - Deal with folios rather than rather than pages, speeding up how the block layer handles bigger IOs (Kundan) - Use spinlocks rather than bit spinlocks in zram (Sebastian, Mike) - Reduce zoned device overhead in ublk (Ming) - Add and use sendpages_ok() for drbd and nvme-tcp (Ofir) - Fix regression in partition error pointer checking (Riyan) - Add support for write zeroes and rotational status in nbd (Wouter) - Add Yu Kuai as new BFQ maintainer. The scheduler has been unmaintained for quite a while. - Various sets of fixes for BFQ (Yu Kuai) - Misc fixes and cleanups (Alvaro, Christophe, Li, Md Haris, Mikhail, Yang) * tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux: (120 commits) nvme-pci: qdepth 1 quirk block: fix potential invalid pointer dereference in blk_add_partition blk_iocost: make read-only static array vrate_adj_pct const block: unpin user pages belonging to a folio at once mm: release number of pages of a folio block: introduce folio awareness and add a bigger size from folio block: Added folio-ized version of bio_add_hw_page() block, bfq: factor out a helper to split bfqq in bfq_init_rq() block, bfq: remove local variable 'bfqq_already_existing' in bfq_init_rq() block, bfq: remove local variable 'split' in bfq_init_rq() block, bfq: remove bfq_log_bfqg() block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator() block, bfq: fix procress reference leakage for bfqq in merge chain block, bfq: fix uaf for accessing waker_bfqq after splitting blk-throttle: support prioritized processing of metadata blk-throttle: remove last_low_overflow_time drbd: Add NULL check for net_conf to prevent dereference in state validation nvme-tcp: fix link failure for TCP auth blk-mq: add missing unplug trace event mtip32xx: Remove redundant null pointer checks in mtip_hw_debugfs_init() ...
Diffstat (limited to 'block/blk.h')
-rw-r--r--block/blk.h74
1 files changed, 56 insertions, 18 deletions
diff --git a/block/blk.h b/block/blk.h
index e180863f918b..86affb583eb6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -331,33 +331,67 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
-static inline bool bio_may_exceed_limits(struct bio *bio,
- const struct queue_limits *lim)
+struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nsegs);
+struct bio *bio_split_write_zeroes(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nsegs);
+struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nr_segs);
+struct bio *bio_split_zone_append(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nr_segs);
+
+/*
+ * All drivers must accept single-segments bios that are smaller than PAGE_SIZE.
+ *
+ * This is a quick and dirty check that relies on the fact that bi_io_vec[0] is
+ * always valid if a bio has data. The check might lead to occasional false
+ * positives when bios are cloned, but compared to the performance impact of
+ * cloned bios themselves the loop below doesn't matter anyway.
+ */
+static inline bool bio_may_need_split(struct bio *bio,
+ const struct queue_limits *lim)
+{
+ return lim->chunk_sectors || bio->bi_vcnt != 1 ||
+ bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
+}
+
+/**
+ * __bio_split_to_limits - split a bio to fit the queue limits
+ * @bio: bio to be split
+ * @lim: queue limits to split based on
+ * @nr_segs: returns the number of segments in the returned bio
+ *
+ * Check if @bio needs splitting based on the queue limits, and if so split off
+ * a bio fitting the limits from the beginning of @bio and return it. @bio is
+ * shortened to the remainder and re-submitted.
+ *
+ * The split bio is allocated from @q->bio_split, which is provided by the
+ * block layer.
+ */
+static inline struct bio *__bio_split_to_limits(struct bio *bio,
+ const struct queue_limits *lim, unsigned int *nr_segs)
{
switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ if (bio_may_need_split(bio, lim))
+ return bio_split_rw(bio, lim, nr_segs);
+ *nr_segs = 1;
+ return bio;
+ case REQ_OP_ZONE_APPEND:
+ return bio_split_zone_append(bio, lim, nr_segs);
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ return bio_split_discard(bio, lim, nr_segs);
case REQ_OP_WRITE_ZEROES:
- return true; /* non-trivial splitting decisions */
+ return bio_split_write_zeroes(bio, lim, nr_segs);
default:
- break;
+ /* other operations can't be split */
+ *nr_segs = 0;
+ return bio;
}
-
- /*
- * All drivers must accept single-segments bios that are <= PAGE_SIZE.
- * This is a quick and dirty check that relies on the fact that
- * bi_io_vec[0] is always valid if a bio has data. The check might
- * lead to occasional false negatives when bios are cloned, but compared
- * to the performance impact of cloned bios themselves the loop below
- * doesn't matter anyway.
- */
- return lim->chunk_sectors || bio->bi_vcnt != 1 ||
- bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}
-struct bio *__bio_split_to_limits(struct bio *bio,
- const struct queue_limits *lim,
- unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
@@ -540,6 +574,10 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
+int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
+ struct folio *folio, size_t len, size_t offset,
+ unsigned int max_sectors, bool *same_page);
+
/*
* Clean up a page appropriately, where the page may be pinned, may have a
* ref taken on it or neither.