aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Makefile3
-rw-r--r--drivers/md/bcache/super.c27
-rw-r--r--drivers/md/dm-bufio.c26
-rw-r--r--drivers/md/dm-cache-metadata.h2
-rw-r--r--drivers/md/dm-cache-target.c2
-rw-r--r--drivers/md/dm-core.h22
-rw-r--r--drivers/md/dm-ebs-target.c15
-rw-r--r--drivers/md/dm-era-target.c8
-rw-r--r--drivers/md/dm-flakey.c8
-rw-r--r--drivers/md/dm-ima.c5
-rw-r--r--drivers/md/dm-integrity.c76
-rw-r--r--drivers/md/dm-io-rewind.c166
-rw-r--r--drivers/md/dm-io.c38
-rw-r--r--drivers/md/dm-ioctl.c6
-rw-r--r--drivers/md/dm-kcopyd.c28
-rw-r--r--drivers/md/dm-log.c13
-rw-r--r--drivers/md/dm-raid.c45
-rw-r--r--drivers/md/dm-raid1.c12
-rw-r--r--drivers/md/dm-rq.c1
-rw-r--r--drivers/md/dm-snap-persistent.c25
-rw-r--r--drivers/md/dm-snap.c2
-rw-r--r--drivers/md/dm-table.c324
-rw-r--r--drivers/md/dm-thin-metadata.c7
-rw-r--r--drivers/md/dm-thin.c4
-rw-r--r--drivers/md/dm-verity-loadpin.c75
-rw-r--r--drivers/md/dm-verity-target.c40
-rw-r--r--drivers/md/dm-verity.h4
-rw-r--r--drivers/md/dm-writecache.c55
-rw-r--r--drivers/md/dm-zone.c95
-rw-r--r--drivers/md/dm-zoned-metadata.c5
-rw-r--r--drivers/md/dm-zoned-target.c25
-rw-r--r--drivers/md/dm-zoned.h2
-rw-r--r--drivers/md/dm.c498
-rw-r--r--drivers/md/dm.h4
-rw-r--r--drivers/md/md-bitmap.c6
-rw-r--r--drivers/md/md.c30
-rw-r--r--drivers/md/md.h5
-rw-r--r--drivers/md/raid1.c14
-rw-r--r--drivers/md/raid10.c22
-rw-r--r--drivers/md/raid5-cache.c12
-rw-r--r--drivers/md/raid5-ppl.c16
-rw-r--r--drivers/md/raid5.c13
42 files changed, 1095 insertions, 691 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 0454b0885b01..84291e38dca8 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -5,7 +5,7 @@
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
- dm-rq.o
+ dm-rq.o dm-io-rewind.o
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-historical-service-time-y += dm-ps-historical-service-time.o
dm-io-affinity-y += dm-ps-io-affinity.o
@@ -83,6 +83,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
obj-$(CONFIG_DM_ZONED) += dm-zoned.o
obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o
+obj-$(CONFIG_SECURITY_LOADPIN_VERITY) += dm-verity-loadpin.o
ifeq ($(CONFIG_DM_INIT),y)
dm-mod-objs += dm-init.o
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3563d15dbaf2..ba3909bb6bea 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -414,8 +414,8 @@ static void uuid_io_unlock(struct closure *cl)
up(&c->uuid_write_mutex);
}
-static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
- struct bkey *k, struct closure *parent)
+static void uuid_io(struct cache_set *c, blk_opf_t opf, struct bkey *k,
+ struct closure *parent)
{
struct closure *cl = &c->uuid_write;
struct uuid_entry *u;
@@ -429,22 +429,22 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
for (i = 0; i < KEY_PTRS(k); i++) {
struct bio *bio = bch_bbio_alloc(c);
- bio->bi_opf = REQ_SYNC | REQ_META | op_flags;
+ bio->bi_opf = opf | REQ_SYNC | REQ_META;
bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
bio->bi_end_io = uuid_endio;
bio->bi_private = cl;
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, c->uuids);
bch_submit_bbio(bio, c, k, i);
- if (op != REQ_OP_WRITE)
+ if ((opf & REQ_OP_MASK) != REQ_OP_WRITE)
break;
}
bch_extent_to_text(buf, sizeof(buf), k);
- pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
+ pr_debug("%s UUIDs at %s\n", (opf & REQ_OP_MASK) == REQ_OP_WRITE ?
+ "wrote" : "read", buf);
for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
if (!bch_is_zero(u->uuid, 16))
@@ -463,7 +463,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
return "bad uuid pointer";
bkey_copy(&c->uuid_bucket, k);
- uuid_io(c, REQ_OP_READ, 0, k, cl);
+ uuid_io(c, REQ_OP_READ, k, cl);
if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
struct uuid_entry_v0 *u0 = (void *) c->uuids;
@@ -511,7 +511,7 @@ static int __uuid_write(struct cache_set *c)
size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS;
SET_KEY_SIZE(&k.key, size);
- uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
+ uuid_io(c, REQ_OP_WRITE, &k.key, &cl);
closure_sync(&cl);
/* Only one bucket used for uuid write */
@@ -587,8 +587,7 @@ static void prio_endio(struct bio *bio)
closure_put(&ca->prio);
}
-static void prio_io(struct cache *ca, uint64_t bucket, int op,
- unsigned long op_flags)
+static void prio_io(struct cache *ca, uint64_t bucket, blk_opf_t opf)
{
struct closure *cl = &ca->prio;
struct bio *bio = bch_bbio_alloc(ca->set);
@@ -601,7 +600,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio->bi_end_io = prio_endio;
bio->bi_private = ca;
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
+ bio->bi_opf = opf | REQ_SYNC | REQ_META;
bch_bio_map(bio, ca->disk_buckets);
closure_bio_submit(ca->set, bio, &ca->prio);
@@ -661,7 +660,7 @@ int bch_prio_write(struct cache *ca, bool wait)
BUG_ON(bucket == -1);
mutex_unlock(&ca->set->bucket_lock);
- prio_io(ca, bucket, REQ_OP_WRITE, 0);
+ prio_io(ca, bucket, REQ_OP_WRITE);
mutex_lock(&ca->set->bucket_lock);
ca->prio_buckets[i] = bucket;
@@ -705,7 +704,7 @@ static int prio_read(struct cache *ca, uint64_t bucket)
ca->prio_last_buckets[bucket_nr] = bucket;
bucket_nr++;
- prio_io(ca, bucket, REQ_OP_READ, 0);
+ prio_io(ca, bucket, REQ_OP_READ);
if (p->csum !=
bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) {
@@ -884,7 +883,7 @@ static void bcache_device_free(struct bcache_device *d)
if (disk) {
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
- blk_cleanup_disk(disk);
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5ffa1dcf84cf..dc01ce33265b 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -577,13 +577,12 @@ static void dmio_complete(unsigned long error, void *context)
b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
}
-static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
+static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
unsigned n_sectors, unsigned offset)
{
int r;
struct dm_io_request io_req = {
- .bi_op = rw,
- .bi_op_flags = 0,
+ .bi_opf = op,
.notify.fn = dmio_complete,
.notify.context = b,
.client = b->c->dm_io,
@@ -616,7 +615,7 @@ static void bio_complete(struct bio *bio)
b->end_io(b, status);
}
-static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
+static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
unsigned n_sectors, unsigned offset)
{
struct bio *bio;
@@ -630,10 +629,10 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
if (!bio) {
dmio:
- use_dmio(b, rw, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset);
return;
}
- bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw);
+ bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, op);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = bio_complete;
bio->bi_private = b;
@@ -669,7 +668,8 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block
return sector;
}
-static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
+static void submit_io(struct dm_buffer *b, enum req_op op,
+ void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned n_sectors;
sector_t sector;
@@ -679,7 +679,7 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff
sector = block_to_sector(b->c, b->block);
- if (rw != REQ_OP_WRITE) {
+ if (op != REQ_OP_WRITE) {
n_sectors = b->c->block_size >> SECTOR_SHIFT;
offset = 0;
} else {
@@ -698,9 +698,9 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff
}
if (b->data_mode != DATA_MODE_VMALLOC)
- use_bio(b, rw, sector, n_sectors, offset);
+ use_bio(b, op, sector, n_sectors, offset);
else
- use_dmio(b, rw, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset);
}
/*----------------------------------------------------------------
@@ -1341,8 +1341,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
struct dm_io_request io_req = {
- .bi_op = REQ_OP_WRITE,
- .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+ .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
@@ -1365,8 +1364,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count)
{
struct dm_io_request io_req = {
- .bi_op = REQ_OP_DISCARD,
- .bi_op_flags = REQ_SYNC,
+ .bi_opf = REQ_OP_DISCARD | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 179ed5bf81a3..0905f2c1615e 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -131,7 +131,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd);
* hints will be lost.
*
* The hints are indexed by the cblock, but many policies will not
- * neccessarily have a fast way of accessing efficiently via cblock. So
+ * necessarily have a fast way of accessing efficiently via cblock. So
* rather than querying the policy for each cblock, we let it walk its data
* structures and fill in the hints in whatever order it wishes.
*/
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 28c5de8eca4a..54a8d5c9a44e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2775,7 +2775,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
/*
* The discard block size in the on disk metadata is not
- * neccessarily the same as we're currently using. So we have to
+ * necessarily the same as we're currently using. So we have to
* be careful to only set the discarded attribute if we know it
* covers a complete block of the new size.
*/
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 54c0473a51dd..6c6bd24774f2 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
#define DM_RESERVED_MAX_IOS 1024
+struct dm_io;
+
struct dm_kobject_holder {
struct kobject kobj;
struct completion completion;
@@ -91,6 +93,14 @@ struct mapped_device {
spinlock_t deferred_lock;
struct bio_list deferred;
+ /*
+ * requeue work context is needed for cloning one new bio
+ * to represent the dm_io to be requeued, since each
+ * dm_io may point to the original bio from FS.
+ */
+ struct work_struct requeue_work;
+ struct dm_io *requeue_list;
+
void *interface_ptr;
/*
@@ -216,6 +226,13 @@ struct dm_table {
#endif
};
+static inline struct dm_target *dm_table_get_target(struct dm_table *t,
+ unsigned int index)
+{
+ BUG_ON(index >= t->num_targets);
+ return t->targets + index;
+}
+
/*
* One of these is allocated per clone bio.
*/
@@ -230,6 +247,9 @@ struct dm_target_io {
sector_t old_sector;
struct bio clone;
};
+#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
+#define DM_IO_BIO_OFFSET \
+ (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
/*
* dm_target_io flags
@@ -299,6 +319,8 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
io->flags |= (1U << bit);
}
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs);
+
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 0221fa63f888..223e8e1a7a13 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -61,7 +61,8 @@ static inline bool __ebs_check_bs(unsigned int bs)
*
* copy blocks between bufio blocks and bio vector's (partial/overlapping) pages.
*/
-static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter)
+static int __ebs_rw_bvec(struct ebs_c *ec, enum req_op op, struct bio_vec *bv,
+ struct bvec_iter *iter)
{
int r = 0;
unsigned char *ba, *pa;
@@ -81,7 +82,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len);
/* Avoid reading for writes in case bio vector's page overwrites block completely. */
- if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio))
+ if (op == REQ_OP_READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio))
ba = dm_bufio_read(ec->bufio, block, &b);
else
ba = dm_bufio_new(ec->bufio, block, &b);
@@ -95,7 +96,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
} else {
/* Copy data to/from bio to buffer if read/new was successful above. */
ba += buf_off;
- if (rw == READ) {
+ if (op == REQ_OP_READ) {
memcpy(pa, ba, cur_len);
flush_dcache_page(bv->bv_page);
} else {
@@ -117,14 +118,14 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
}
/* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */
-static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio)
+static int __ebs_rw_bio(struct ebs_c *ec, enum req_op op, struct bio *bio)
{
int r = 0, rr;
struct bio_vec bv;
struct bvec_iter iter;
bio_for_each_bvec(bv, bio, iter) {
- rr = __ebs_rw_bvec(ec, rw, &bv, &iter);
+ rr = __ebs_rw_bvec(ec, op, &bv, &iter);
if (rr)
r = rr;
}
@@ -205,10 +206,10 @@ static void __ebs_process_bios(struct work_struct *ws)
bio_list_for_each(bio, &bios) {
r = -EIO;
if (bio_op(bio) == REQ_OP_READ)
- r = __ebs_rw_bio(ec, READ, bio);
+ r = __ebs_rw_bio(ec, REQ_OP_READ, bio);
else if (bio_op(bio) == REQ_OP_WRITE) {
write = true;
- r = __ebs_rw_bio(ec, WRITE, bio);
+ r = __ebs_rw_bio(ec, REQ_OP_WRITE, bio);
} else if (bio_op(bio) == REQ_OP_DISCARD) {
__ebs_forget_bio(ec, bio);
r = __ebs_discard_bio(ec, bio);
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 1f6bf152b3c7..e92c1afc3677 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1400,7 +1400,7 @@ static void start_worker(struct era *era)
static void stop_worker(struct era *era)
{
atomic_set(&era->suspended, 1);
- flush_workqueue(era->wq);
+ drain_workqueue(era->wq);
}
/*----------------------------------------------------------------
@@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_target *ti)
}
stop_worker(era);
+
+ r = metadata_commit(era->md);
+ if (r) {
+ DMERR("%s: metadata_commit failed", __func__);
+ /* FIXME: fail mode */
+ }
}
static int era_preresume(struct dm_target *ti)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index f2305eb758a2..89fa7a68c6c4 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -32,7 +32,7 @@ struct flakey_c {
unsigned corrupt_bio_byte;
unsigned corrupt_bio_rw;
unsigned corrupt_bio_value;
- unsigned corrupt_bio_flags;
+ blk_opf_t corrupt_bio_flags;
};
enum feature_flag_bits {
@@ -145,7 +145,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
/*
* Only corrupt bios with these flags set.
*/
- r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
+ BUILD_BUG_ON(sizeof(fc->corrupt_bio_flags) !=
+ sizeof(unsigned int));
+ r = dm_read_arg(_args + 3, as,
+ (__force unsigned *)&fc->corrupt_bio_flags,
+ &ti->error);
if (r)
return r;
argc--;
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 1842d3a958ef..a1bd7cd52b1b 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -208,7 +208,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
if (!target_data_buf)
goto error;
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio))
goto error;
@@ -237,9 +237,6 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
- if (!ti)
- goto error;
-
last_target_measured = 0;
/*
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 3d5a0ce123c9..c60f9b2ece2d 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -298,7 +298,7 @@ struct dm_integrity_io {
struct work_struct work;
struct dm_integrity_c *ic;
- enum req_opf op;
+ enum req_op op;
bool fua;
struct dm_integrity_range range;
@@ -551,14 +551,14 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
return 0;
}
-static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
+static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf)
{
struct dm_io_request io_req;
struct dm_io_region io_loc;
+ const enum req_op op = opf & REQ_OP_MASK;
int r;
- io_req.bi_op = op;
- io_req.bi_op_flags = op_flags;
+ io_req.bi_opf = opf;
io_req.mem.type = DM_IO_KMEM;
io_req.mem.ptr.addr = ic->sb;
io_req.notify.fn = NULL;
@@ -1050,8 +1050,9 @@ static void complete_journal_io(unsigned long error, void *context)
complete_journal_op(comp);
}
-static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
- unsigned sector, unsigned n_sectors, struct journal_completion *comp)
+static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf,
+ unsigned sector, unsigned n_sectors,
+ struct journal_completion *comp)
{
struct dm_io_request io_req;
struct dm_io_region io_loc;
@@ -1067,8 +1068,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
- io_req.bi_op = op;
- io_req.bi_op_flags = op_flags;
+ io_req.bi_opf = opf;
io_req.mem.type = DM_IO_PAGE_LIST;
if (ic->journal_io)
io_req.mem.ptr.pl = &ic->journal_io[pl_index];
@@ -1088,7 +1088,8 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
r = dm_io(&io_req, 1, &io_loc, NULL);
if (unlikely(r)) {
- dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r);
+ dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ?
+ "reading journal" : "writing journal", r);
if (comp) {
WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
complete_journal_io(-1UL, comp);
@@ -1096,15 +1097,16 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
}
}
-static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
- unsigned n_sections, struct journal_completion *comp)
+static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf,
+ unsigned section, unsigned n_sections,
+ struct journal_completion *comp)
{
unsigned sector, n_sectors;
sector = section * ic->journal_section_sectors;
n_sectors = n_sections * ic->journal_section_sectors;
- rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp);
+ rw_journal_sectors(ic, opf, sector, n_sectors, comp);
}
static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
@@ -1129,7 +1131,7 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
for (i = 0; i < commit_sections; i++)
rw_section_mac(ic, commit_start + i, true);
}
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
+ rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start,
commit_sections, &io_comp);
} else {
unsigned to_end;
@@ -1141,7 +1143,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
if (try_wait_for_completion(&crypt_comp_1.comp)) {
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ rw_journal(ic, REQ_OP_WRITE | REQ_FUA,
+ commit_start, to_end, &io_comp);
reinit_completion(&crypt_comp_1.comp);
crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
@@ -1152,17 +1155,17 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
wait_for_completion_io(&crypt_comp_1.comp);
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
wait_for_completion_io(&crypt_comp_2.comp);
}
} else {
for (i = 0; i < to_end; i++)
rw_section_mac(ic, commit_start + i, true);
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
for (i = 0; i < commit_sections - to_end; i++)
rw_section_mac(ic, i, true);
}
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp);
+ rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp);
}
wait_for_completion_io(&io_comp.comp);
@@ -1188,8 +1191,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
- io_req.bi_op = REQ_OP_WRITE;
- io_req.bi_op_flags = 0;
+ io_req.bi_opf = REQ_OP_WRITE;
io_req.mem.type = DM_IO_PAGE_LIST;
io_req.mem.ptr.pl = &ic->journal[pl_index];
io_req.mem.offset = pl_offset;
@@ -1516,8 +1518,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat
if (!ic->meta_dev)
flush_data = false;
if (flush_data) {
- fr.io_req.bi_op = REQ_OP_WRITE,
- fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+ fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
fr.io_req.mem.type = DM_IO_KMEM,
fr.io_req.mem.ptr.addr = NULL,
fr.io_req.notify.fn = flush_notify,
@@ -2626,7 +2627,7 @@ static void recalc_write_super(struct dm_integrity_c *ic)
if (dm_integrity_failed(ic))
return;
- r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
+ r = sync_rw_sb(ic, REQ_OP_WRITE);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
}
@@ -2706,8 +2707,7 @@ next_chunk:
if (unlikely(dm_integrity_failed(ic)))
goto err;
- io_req.bi_op = REQ_OP_READ;
- io_req.bi_op_flags = 0;
+ io_req.bi_opf = REQ_OP_READ;
io_req.mem.type = DM_IO_VMA;
io_req.mem.ptr.addr = ic->recalc_buffer;
io_req.notify.fn = NULL;
@@ -2800,7 +2800,7 @@ static void bitmap_block_work(struct work_struct *w)
if (bio_list_empty(&waiting))
return;
- rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC,
+ rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC,
bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
@@ -2846,7 +2846,7 @@ static void bitmap_flush_work(struct work_struct *work)
block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
- rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
spin_lock_irq(&ic->endio_wait.lock);
@@ -2918,7 +2918,7 @@ static void replay_journal(struct dm_integrity_c *ic)
if (!ic->just_formatted) {
DEBUG_print("reading journal\n");
- rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL);
+ rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL);
if (ic->journal_io)
DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
if (ic->journal_io) {
@@ -3113,7 +3113,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
/* set to 0 to test bitmap replay code */
init_journal(ic, 0, ic->journal_sections, 0);
ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
- r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
#endif
@@ -3136,23 +3136,23 @@ static void dm_integrity_resume(struct dm_target *ti)
if (ic->provided_data_sectors > old_provided_data_sectors &&
ic->mode == 'B' &&
ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
- rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
+ rw_journal_sectors(ic, REQ_OP_READ, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
- rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
}
ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
- r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
}
if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
DEBUG_print("resume dirty_bitmap\n");
- rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
+ rw_journal_sectors(ic, REQ_OP_READ, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
if (ic->mode == 'B') {
if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
@@ -3171,7 +3171,7 @@ static void dm_integrity_resume(struct dm_target *ti)
block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
- rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
ic->sb->recalc_sector = cpu_to_le64(0);
@@ -3187,7 +3187,7 @@ static void dm_integrity_resume(struct dm_target *ti)
replay_journal(ic);
ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
}
- r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
} else {
@@ -3199,7 +3199,7 @@ static void dm_integrity_resume(struct dm_target *ti)
if (ic->mode == 'B') {
ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
- r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
@@ -3215,7 +3215,7 @@ static void dm_integrity_resume(struct dm_target *ti)
block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
}
- rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
}
}
@@ -4256,7 +4256,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- r = sync_rw_sb(ic, REQ_OP_READ, 0);
+ r = sync_rw_sb(ic, REQ_OP_READ);
if (r) {
ti->error = "Error reading superblock";
goto bad;
@@ -4500,7 +4500,7 @@ try_smaller_buffer:
ti->error = "Error initializing journal";
goto bad;
}
- r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
if (r) {
ti->error = "Error initializing superblock";
goto bad;
diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c
new file mode 100644
index 000000000000..0db53ccb94ba
--- /dev/null
+++ b/drivers/md/dm-io-rewind.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022 Red Hat, Inc.
+ */
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+#include <linux/blk-integrity.h>
+
+#include "dm-core.h"
+
+static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned int bytes)
+{
+ int idx;
+
+ iter->bi_size += bytes;
+ if (bytes <= iter->bi_bvec_done) {
+ iter->bi_bvec_done -= bytes;
+ return true;
+ }
+
+ bytes -= iter->bi_bvec_done;
+ idx = iter->bi_idx - 1;
+
+ while (idx >= 0 && bytes && bytes > bv[idx].bv_len) {
+ bytes -= bv[idx].bv_len;
+ idx--;
+ }
+
+ if (WARN_ONCE(idx < 0 && bytes,
+ "Attempted to rewind iter beyond bvec's boundaries\n")) {
+ iter->bi_size -= bytes;
+ iter->bi_bvec_done = 0;
+ iter->bi_idx = 0;
+ return false;
+ }
+
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = bv[idx].bv_len - bytes;
+ return true;
+}
+
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+/**
+ * dm_bio_integrity_rewind - Rewind integrity vector
+ * @bio: bio whose integrity vector to update
+ * @bytes_done: number of data bytes to rewind
+ *
+ * Description: This function calculates how many integrity bytes the
+ * number of completed data bytes correspond to and rewind the
+ * integrity vector accordingly.
+ */
+static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done)
+{
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
+
+ bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9);
+ dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes);
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+static inline void dm_bio_integrity_rewind(struct bio *bio,
+ unsigned int bytes_done)
+{
+ return;
+}
+
+#endif
+
+#if defined(CONFIG_BLK_INLINE_ENCRYPTION)
+
+/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */
+static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+ unsigned int dec)
+{
+ int i;
+
+ for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
+ u64 prev = dun[i];
+
+ dun[i] -= dec;
+ if (dun[i] > prev)
+ dec = 1;
+ else
+ dec = 0;
+ }
+}
+
+static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+
+ dm_bio_crypt_dun_decrement(bc->bc_dun,
+ bytes >> bc->bc_key->data_unit_size_bits);
+}
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ return;
+}
+
+#endif
+
+static inline void dm_bio_rewind_iter(const struct bio *bio,
+ struct bvec_iter *iter, unsigned int bytes)
+{
+ iter->bi_sector -= bytes >> 9;
+
+ /* No advance means no rewind */
+ if (bio_no_advance_iter(bio))
+ iter->bi_size += bytes;
+ else
+ dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
+}
+
+/**
+ * dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes.
+ * @bio: bio to rewind
+ * @bytes: how many bytes to rewind
+ *
+ * WARNING:
+ * Caller must ensure that @bio has a fixed end sector, to allow
+ * rewinding from end of bio and restoring its original position.
+ * Caller is also responsibile for restoring bio's size.
+ */
+static void dm_bio_rewind(struct bio *bio, unsigned bytes)
+{
+ if (bio_integrity(bio))
+ dm_bio_integrity_rewind(bio, bytes);
+
+ if (bio_has_crypt_ctx(bio))
+ dm_bio_crypt_rewind(bio, bytes);
+
+ dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
+}
+
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
+{
+ struct bio *orig = io->orig_bio;
+ struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
+ GFP_NOIO, bs);
+ /*
+ * dm_bio_rewind can restore to previous position since the
+ * end sector is fixed for original bio, but we still need
+ * to restore bio's size manually (using io->sectors).
+ */
+ dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
+ orig->bi_iter.bi_size));
+ bio_trim(new_orig, 0, io->sectors);
+
+ bio_chain(new_orig, orig);
+ /*
+ * __bi_remaining was increased (by dm_split_and_process_bio),
+ * so must drop the one added in bio_chain.
+ */
+ atomic_dec(&orig->__bi_remaining);
+ io->orig_bio = new_orig;
+}
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index e4b95eaeec8c..783564533459 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -293,7 +293,7 @@ static void km_dp_init(struct dpages *dp, void *data)
/*-----------------------------------------------------------------
* IO routines that accept a list of pages.
*---------------------------------------------------------------*/
-static void do_region(int op, int op_flags, unsigned region,
+static void do_region(const blk_opf_t opf, unsigned region,
struct dm_io_region *where, struct dpages *dp,
struct io *io)
{
@@ -306,6 +306,7 @@ static void do_region(int op, int op_flags, unsigned region,
struct request_queue *q = bdev_get_queue(where->bdev);
sector_t num_sectors;
unsigned int special_cmd_max_sectors;
+ const enum req_op op = opf & REQ_OP_MASK;
/*
* Reject unsupported discard and write same requests.
@@ -339,8 +340,8 @@ static void do_region(int op, int op_flags, unsigned region,
(PAGE_SIZE >> SECTOR_SHIFT)));
}
- bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags,
- GFP_NOIO, &io->client->bios);
+ bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO,
+ &io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
bio->bi_end_io = endio;
store_io_and_region_in_bio(bio, io, region);
@@ -368,7 +369,7 @@ static void do_region(int op, int op_flags, unsigned region,
} while (remaining);
}
-static void dispatch_io(int op, int op_flags, unsigned int num_regions,
+static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
struct dm_io_region *where, struct dpages *dp,
struct io *io, int sync)
{
@@ -378,7 +379,7 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions,
BUG_ON(num_regions > DM_IO_MAX_REGIONS);
if (sync)
- op_flags |= REQ_SYNC;
+ opf |= REQ_SYNC;
/*
* For multiple regions we need to be careful to rewind
@@ -386,8 +387,8 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions,
*/
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
- if (where[i].count || (op_flags & REQ_PREFLUSH))
- do_region(op, op_flags, i, where + i, dp, io);
+ if (where[i].count || (opf & REQ_PREFLUSH))
+ do_region(opf, i, where + i, dp, io);
}
/*
@@ -411,13 +412,13 @@ static void sync_io_complete(unsigned long error, void *context)
}
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
- struct dm_io_region *where, int op, int op_flags,
- struct dpages *dp, unsigned long *error_bits)
+ struct dm_io_region *where, blk_opf_t opf, struct dpages *dp,
+ unsigned long *error_bits)
{
struct io *io;
struct sync_io sio;
- if (num_regions > 1 && !op_is_write(op)) {
+ if (num_regions > 1 && !op_is_write(opf)) {
WARN_ON(1);
return -EIO;
}
@@ -434,7 +435,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
- dispatch_io(op, op_flags, num_regions, where, dp, io, 1);
+ dispatch_io(opf, num_regions, where, dp, io, 1);
wait_for_completion_io(&sio.wait);
@@ -445,12 +446,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
- struct dm_io_region *where, int op, int op_flags,
+ struct dm_io_region *where, blk_opf_t opf,
struct dpages *dp, io_notify_fn fn, void *context)
{
struct io *io;
- if (num_regions > 1 && !op_is_write(op)) {
+ if (num_regions > 1 && !op_is_write(opf)) {
WARN_ON(1);
fn(1, context);
return -EIO;
@@ -466,7 +467,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size;
- dispatch_io(op, op_flags, num_regions, where, dp, io, 0);
+ dispatch_io(opf, num_regions, where, dp, io, 0);
return 0;
}
@@ -489,7 +490,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
case DM_IO_VMA:
flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
- if (io_req->bi_op == REQ_OP_READ) {
+ if ((io_req->bi_opf & REQ_OP_MASK) == REQ_OP_READ) {
dp->vma_invalidate_address = io_req->mem.ptr.vma;
dp->vma_invalidate_size = size;
}
@@ -519,11 +520,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
if (!io_req->notify.fn)
return sync_io(io_req->client, num_regions, where,
- io_req->bi_op, io_req->bi_op_flags, &dp,
- sync_error_bits);
+ io_req->bi_opf, &dp, sync_error_bits);
- return async_io(io_req->client, num_regions, where, io_req->bi_op,
- io_req->bi_op_flags, &dp, io_req->notify.fn,
+ return async_io(io_req->client, num_regions, where,
+ io_req->bi_opf, &dp, io_req->notify.fn,
io_req->notify.context);
}
EXPORT_SYMBOL(dm_io);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 87310fceb0d8..98976aaa9db9 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -832,7 +832,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
if (get_disk_ro(disk))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
param->flags |= DM_ACTIVE_PRESENT_FLAG;
@@ -845,7 +845,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (table) {
if (!(dm_table_get_mode(table) & FMODE_WRITE))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
dm_put_live_table(md, srcu_idx);
}
@@ -1248,7 +1248,7 @@ static void retrieve_status(struct dm_table *table,
type = STATUSTYPE_INFO;
/* Get all the target info */
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
size_t l;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 37b03ab7e5c9..4d3bbbea2e9a 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -219,7 +219,7 @@ static struct page_list *alloc_pl(gfp_t gfp)
if (!pl)
return NULL;
- pl->page = alloc_page(gfp);
+ pl->page = alloc_page(gfp | __GFP_HIGHMEM);
if (!pl->page) {
kfree(pl);
return NULL;
@@ -350,9 +350,9 @@ struct kcopyd_job {
unsigned long write_err;
/*
- * Either READ or WRITE
+ * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES.
*/
- int rw;
+ enum req_op op;
struct dm_io_region source;
/*
@@ -418,7 +418,8 @@ static struct kcopyd_job *pop_io_job(struct list_head *jobs,
* constraint and sequential writes that are at the right position.
*/
list_for_each_entry(job, jobs, list) {
- if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
+ if (job->op == REQ_OP_READ ||
+ !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
list_del(&job->list);
return job;
}
@@ -518,7 +519,7 @@ static void complete_io(unsigned long error, void *context)
io_job_finish(kc->throttle);
if (error) {
- if (op_is_write(job->rw))
+ if (op_is_write(job->op))
job->write_err |= error;
else
job->read_err = 1;
@@ -530,11 +531,11 @@ static void complete_io(unsigned long error, void *context)
}
}
- if (op_is_write(job->rw))
+ if (op_is_write(job->op))
push(&kc->complete_jobs, job);
else {
- job->rw = WRITE;
+ job->op = REQ_OP_WRITE;
push(&kc->io_jobs, job);
}
@@ -549,8 +550,7 @@ static int run_io_job(struct kcopyd_job *job)
{
int r;
struct dm_io_request io_req = {
- .bi_op = job->rw,
- .bi_op_flags = 0,
+ .bi_opf = job->op,
.mem.type = DM_IO_PAGE_LIST,
.mem.ptr.pl = job->pages,
.mem.offset = 0,
@@ -571,7 +571,7 @@ static int run_io_job(struct kcopyd_job *job)
io_job_start(job->kc->throttle);
- if (job->rw == READ)
+ if (job->op == REQ_OP_READ)
r = dm_io(&io_req, 1, &job->source, NULL);
else
r = dm_io(&io_req, job->num_dests, job->dests, NULL);
@@ -614,7 +614,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
if (r < 0) {
/* error this rogue job */
- if (op_is_write(job->rw))
+ if (op_is_write(job->op))
job->write_err = (unsigned long) -1L;
else
job->read_err = 1;
@@ -817,7 +817,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
if (from) {
job->source = *from;
job->pages = NULL;
- job->rw = READ;
+ job->op = REQ_OP_READ;
} else {
memset(&job->source, 0, sizeof job->source);
job->source.count = job->dests[0].count;
@@ -826,10 +826,10 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
/*
* Use WRITE ZEROES to optimize zeroing if all dests support it.
*/
- job->rw = REQ_OP_WRITE_ZEROES;
+ job->op = REQ_OP_WRITE_ZEROES;
for (i = 0; i < job->num_dests; i++)
if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
- job->rw = WRITE;
+ job->op = REQ_OP_WRITE;
break;
}
}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 06f328928a7f..cf10fa667797 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -291,10 +291,9 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis
core->nr_regions = le64_to_cpu(disk->nr_regions);
}
-static int rw_header(struct log_c *lc, int op)
+static int rw_header(struct log_c *lc, enum req_op op)
{
- lc->io_req.bi_op = op;
- lc->io_req.bi_op_flags = 0;
+ lc->io_req.bi_opf = op;
return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
}
@@ -307,8 +306,7 @@ static int flush_header(struct log_c *lc)
.count = 0,
};
- lc->io_req.bi_op = REQ_OP_WRITE;
- lc->io_req.bi_op_flags = REQ_PREFLUSH;
+ lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
return dm_io(&lc->io_req, 1, &null_location, NULL);
}
@@ -415,8 +413,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
/*
* Work out how many "unsigned long"s we need to hold the bitset.
*/
- bitset_size = dm_round_up(region_count,
- sizeof(*lc->clean_bits) << BYTE_SHIFT);
+ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
bitset_size >>= BYTE_SHIFT;
lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
@@ -616,7 +613,7 @@ static int disk_resume(struct dm_dirty_log *log)
log_clear_bit(lc, lc->clean_bits, i);
/* clear any old bits -- device has shrunk */
- for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
+ for (i = lc->region_count; i % BITS_PER_LONG; i++)
log_clear_bit(lc, lc->clean_bits, i);
/* copy clean across to sync */
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5e41fbae3f6b..1ec17c32867f 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1001,12 +1001,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
static int validate_raid_redundancy(struct raid_set *rs)
{
unsigned int i, rebuild_cnt = 0;
- unsigned int rebuilds_per_group = 0, copies;
+ unsigned int rebuilds_per_group = 0, copies, raid_disks;
unsigned int group_size, last_group_start;
- for (i = 0; i < rs->md.raid_disks; i++)
- if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
- !rs->dev[i].rdev.sb_page)
+ for (i = 0; i < rs->raid_disks; i++)
+ if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
+ ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
+ !rs->dev[i].rdev.sb_page)))
rebuild_cnt++;
switch (rs->md.level) {
@@ -1046,8 +1047,9 @@ static int validate_raid_redundancy(struct raid_set *rs)
* A A B B C
* C D D E E
*/
+ raid_disks = min(rs->raid_disks, rs->md.raid_disks);
if (__is_raid10_near(rs->md.new_layout)) {
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
@@ -1070,10 +1072,10 @@ static int validate_raid_redundancy(struct raid_set *rs)
* results in the need to treat the last (potentially larger)
* set differently.
*/
- group_size = (rs->md.raid_disks / copies);
- last_group_start = (rs->md.raid_disks / group_size) - 1;
+ group_size = (raid_disks / copies);
+ last_group_start = (raid_disks / group_size) - 1;
last_group_start *= group_size;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies) && !(i > last_group_start))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
@@ -1367,7 +1369,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
}
rs->md.bitmap_info.daemon_sleep = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) {
- /* Userspace passes new data_offset after having extended the the data image LV */
+ /* Userspace passes new data_offset after having extended the data image LV */
if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
rs->ti->error = "Only one data_offset argument pair allowed";
return -EINVAL;
@@ -1588,7 +1590,7 @@ static sector_t __rdev_sectors(struct raid_set *rs)
{
int i;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
struct md_rdev *rdev = &rs->dev[i].rdev;
if (!test_bit(Journal, &rdev->flags) &&
@@ -2036,7 +2038,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload)
rdev->sb_loaded = 0;
- if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) {
+ if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true)) {
DMERR("Failed to read superblock of device at position %d",
rdev->raid_disk);
md_error(rdev->mddev, rdev);
@@ -3095,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
ti->num_flush_bios = 1;
+ ti->needs_bio_set_dev = true;
/* Restore any requested new layout for conversion decision */
rs_config_restore(rs, &rs_layout);
@@ -3507,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
- struct r5conf *conf = mddev->private;
+ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
unsigned long recovery;
unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -3725,7 +3728,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, false);
+ md_reap_sync_thread(mddev);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
@@ -3766,13 +3769,13 @@ static int raid_iterate_devices(struct dm_target *ti,
unsigned int i;
int r = 0;
- for (i = 0; !r && i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev)
- r = fn(ti,
- rs->dev[i].data_dev,
- 0, /* No offset on data devs */
- rs->md.dev_sectors,
- data);
+ for (i = 0; !r && i < rs->raid_disks; i++) {
+ if (rs->dev[i].data_dev) {
+ r = fn(ti, rs->dev[i].data_dev,
+ 0, /* No offset on data devs */
+ rs->md.dev_sectors, data);
+ }
+ }
return r;
}
@@ -3817,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
r = &rs->dev[i].rdev;
/* HM FIXME: enhance journal device recovery processing */
if (test_bit(Journal, &r->flags))
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 8811d484fdd1..06a38dc32025 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -260,8 +260,7 @@ static int mirror_flush(struct dm_target *ti)
struct dm_io_region io[MAX_NR_MIRRORS];
struct mirror *m;
struct dm_io_request io_req = {
- .bi_op = REQ_OP_WRITE,
- .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+ .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = ms->io_client,
@@ -535,8 +534,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
{
struct dm_io_region io;
struct dm_io_request io_req = {
- .bi_op = REQ_OP_READ,
- .bi_op_flags = 0,
+ .bi_opf = REQ_OP_READ,
.mem.type = DM_IO_BIO,
.mem.ptr.bio = bio,
.notify.fn = read_callback,
@@ -648,9 +646,9 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
unsigned int i;
struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
struct mirror *m;
+ blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH);
struct dm_io_request io_req = {
- .bi_op = REQ_OP_WRITE,
- .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH),
+ .bi_opf = REQ_OP_WRITE | op_flags,
.mem.type = DM_IO_BIO,
.mem.ptr.bio = bio,
.notify.fn = write_callback,
@@ -659,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
};
if (bio_op(bio) == REQ_OP_DISCARD) {
- io_req.bi_op = REQ_OP_DISCARD;
+ io_req.bi_opf = REQ_OP_DISCARD | op_flags;
io_req.mem.type = DM_IO_KMEM;
io_req.mem.ptr.addr = NULL;
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a83b98a8d2a9..4f49bbcce4f1 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -43,7 +43,6 @@ unsigned dm_get_reserved_rq_based_ios(void)
return __dm_get_module_param(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
}
-EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
static unsigned dm_get_blk_mq_nr_hw_queues(void)
{
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 3bb5cff5d6fc..f46f930eedf9 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -226,8 +226,8 @@ static void do_metadata(struct work_struct *work)
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
-static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
- int op_flags, int metadata)
+static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf,
+ int metadata)
{
struct dm_io_region where = {
.bdev = dm_snap_cow(ps->store->snap)->bdev,
@@ -235,8 +235,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
- .bi_op = op,
- .bi_op_flags = op_flags,
+ .bi_opf = opf,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = area,
.client = ps->io_client,
@@ -282,11 +281,11 @@ static void skip_metadata(struct pstore *ps)
* Read or write a metadata area. Remembering to skip the first
* chunk which holds the header.
*/
-static int area_io(struct pstore *ps, int op, int op_flags)
+static int area_io(struct pstore *ps, blk_opf_t opf)
{
chunk_t chunk = area_location(ps, ps->current_area);
- return chunk_io(ps, ps->area, chunk, op, op_flags, 0);
+ return chunk_io(ps, ps->area, chunk, opf, 0);
}
static void zero_memory_area(struct pstore *ps)
@@ -297,7 +296,7 @@ static void zero_memory_area(struct pstore *ps)
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
return chunk_io(ps, ps->zero_area, area_location(ps, area),
- REQ_OP_WRITE, 0, 0);
+ REQ_OP_WRITE, 0);
}
static int read_header(struct pstore *ps, int *new_snapshot)
@@ -329,7 +328,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
if (r)
return r;
- r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
+ r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 1);
if (r)
goto bad;
@@ -390,7 +389,7 @@ static int write_header(struct pstore *ps)
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
- return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
+ return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 1);
}
/*
@@ -734,8 +733,8 @@ static void persistent_commit_exception(struct dm_exception_store *store,
/*
* Commit exceptions to disk.
*/
- if (ps->valid && area_io(ps, REQ_OP_WRITE,
- REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
+ if (ps->valid && area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA |
+ REQ_SYNC))
ps->valid = 0;
/*
@@ -775,7 +774,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
return 0;
ps->current_area--;
- r = area_io(ps, REQ_OP_READ, 0);
+ r = area_io(ps, REQ_OP_READ);
if (r < 0)
return r;
ps->current_committed = ps->exceptions_per_area;
@@ -812,7 +811,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
for (i = 0; i < nr_merged; i++)
clear_exception(ps, ps->current_committed - 1 - i);
- r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA);
+ r = area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA);
if (r < 0)
return r;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0d336b5ec571..d1c2f84d27e3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2026,7 +2026,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/*
* Write to snapshot - higher level takes care of RW/RO
* flags so we should only get this if we are
- * writeable.
+ * writable.
*/
if (bio_data_dir(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd539afbfe88..332f96b58252 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -6,6 +6,7 @@
*/
#include "dm-core.h"
+#include "dm-rq.h"
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -174,8 +175,6 @@ static void dm_table_destroy_crypto_profile(struct dm_table *t);
void dm_table_destroy(struct dm_table *t)
{
- unsigned int i;
-
if (!t)
return;
@@ -184,13 +183,13 @@ void dm_table_destroy(struct dm_table *t)
kvfree(t->index[t->depth - 2]);
/* free the targets */
- for (i = 0; i < t->num_targets; i++) {
- struct dm_target *tgt = t->targets + i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- if (tgt->type->dtr)
- tgt->type->dtr(tgt);
+ if (ti->type->dtr)
+ ti->type->dtr(ti);
- dm_put_target_type(tgt->type);
+ dm_put_target_type(ti->type);
}
kvfree(t->highs);
@@ -450,14 +449,14 @@ EXPORT_SYMBOL(dm_put_device);
/*
* Checks to see if the target joins onto the end of the table.
*/
-static int adjoin(struct dm_table *table, struct dm_target *ti)
+static int adjoin(struct dm_table *t, struct dm_target *ti)
{
struct dm_target *prev;
- if (!table->num_targets)
+ if (!t->num_targets)
return !ti->begin;
- prev = &table->targets[table->num_targets - 1];
+ prev = &t->targets[t->num_targets - 1];
return (ti->begin == (prev->begin + prev->len));
}
@@ -564,8 +563,8 @@ int dm_split_args(int *argc, char ***argvp, char *input)
* two or more targets, the size of each piece it gets split into must
* be compatible with the logical_block_size of the target processing it.
*/
-static int validate_hardware_logical_block_alignment(struct dm_table *table,
- struct queue_limits *limits)
+static int validate_hardware_logical_block_alignment(struct dm_table *t,
+ struct queue_limits *limits)
{
/*
* This function uses arithmetic modulo the logical_block_size
@@ -587,13 +586,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
struct dm_target *ti;
struct queue_limits ti_limits;
- unsigned i;
+ unsigned int i;
/*
* Check each entry in the table in turn.
*/
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- ti = dm_table_get_target(table, i);
+ for (i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
blk_set_stacking_limits(&ti_limits);
@@ -621,7 +620,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
if (remaining) {
DMWARN("%s: table line %u (start sect %llu len %llu) "
"not aligned to h/w logical block size %u",
- dm_device_name(table->md), i,
+ dm_device_name(t->md), i,
(unsigned long long) ti->begin,
(unsigned long long) ti->len,
limits->logical_block_size);
@@ -636,7 +635,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
{
int r = -EINVAL, argc;
char **argv;
- struct dm_target *tgt;
+ struct dm_target *ti;
if (t->singleton) {
DMERR("%s: target type %s must appear alone in table",
@@ -646,87 +645,87 @@ int dm_table_add_target(struct dm_table *t, const char *type,
BUG_ON(t->num_targets >= t->num_allocated);
- tgt = t->targets + t->num_targets;
- memset(tgt, 0, sizeof(*tgt));
+ ti = t->targets + t->num_targets;
+ memset(ti, 0, sizeof(*ti));
if (!len) {
DMERR("%s: zero-length target", dm_device_name(t->md));
return -EINVAL;
}
- tgt->type = dm_get_target_type(type);
- if (!tgt->type) {
+ ti->type = dm_get_target_type(type);
+ if (!ti->type) {
DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
return -EINVAL;
}
- if (dm_target_needs_singleton(tgt->type)) {
+ if (dm_target_needs_singleton(ti->type)) {
if (t->num_targets) {
- tgt->error = "singleton target type must appear alone in table";
+ ti->error = "singleton target type must appear alone in table";
goto bad;
}
t->singleton = true;
}
- if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
- tgt->error = "target type may not be included in a read-only table";
+ if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
+ ti->error = "target type may not be included in a read-only table";
goto bad;
}
if (t->immutable_target_type) {
- if (t->immutable_target_type != tgt->type) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ if (t->immutable_target_type != ti->type) {
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- } else if (dm_target_is_immutable(tgt->type)) {
+ } else if (dm_target_is_immutable(ti->type)) {
if (t->num_targets) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- t->immutable_target_type = tgt->type;
+ t->immutable_target_type = ti->type;
}
- if (dm_target_has_integrity(tgt->type))
+ if (dm_target_has_integrity(ti->type))
t->integrity_added = 1;
- tgt->table = t;
- tgt->begin = start;
- tgt->len = len;
- tgt->error = "Unknown error";
+ ti->table = t;
+ ti->begin = start;
+ ti->len = len;
+ ti->error = "Unknown error";
/*
* Does this target adjoin the previous one ?
*/
- if (!adjoin(t, tgt)) {
- tgt->error = "Gap in table";
+ if (!adjoin(t, ti)) {
+ ti->error = "Gap in table";
goto bad;
}
r = dm_split_args(&argc, &argv, params);
if (r) {
- tgt->error = "couldn't split parameters";
+ ti->error = "couldn't split parameters";
goto bad;
}
- r = tgt->type->ctr(tgt, argc, argv);
+ r = ti->type->ctr(ti, argc, argv);
kfree(argv);
if (r)
goto bad;
- t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+ t->highs[t->num_targets++] = ti->begin + ti->len - 1;
- if (!tgt->num_discard_bios && tgt->discards_supported)
+ if (!ti->num_discard_bios && ti->discards_supported)
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
- if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
+ if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
return 0;
bad:
- DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r));
- dm_put_target_type(tgt->type);
+ DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
+ dm_put_target_type(ti->type);
return r;
}
@@ -825,14 +824,11 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
}
static bool dm_table_supports_dax(struct dm_table *t,
- iterate_devices_callout_fn iterate_fn)
+ iterate_devices_callout_fn iterate_fn)
{
- struct dm_target *ti;
- unsigned i;
-
/* Ensure that all targets support DAX. */
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->direct_access)
return false;
@@ -860,9 +856,8 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
static int dm_table_determine_type(struct dm_table *t)
{
- unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
- struct dm_target *tgt;
+ struct dm_target *ti;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
@@ -876,11 +871,11 @@ static int dm_table_determine_type(struct dm_table *t)
goto verify_rq_based;
}
- for (i = 0; i < t->num_targets; i++) {
- tgt = t->targets + i;
- if (dm_target_hybrid(tgt))
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
+ if (dm_target_hybrid(ti))
hybrid = 1;
- else if (dm_target_request_based(tgt))
+ else if (dm_target_request_based(ti))
request_based = 1;
else
bio_based = 1;
@@ -942,18 +937,18 @@ verify_rq_based:
return 0;
}
- tgt = dm_table_get_immutable_target(t);
- if (!tgt) {
+ ti = dm_table_get_immutable_target(t);
+ if (!ti) {
DMERR("table load rejected: immutable target is required");
return -EINVAL;
- } else if (tgt->max_io_len) {
+ } else if (ti->max_io_len) {
DMERR("table load rejected: immutable target that splits IO is not supported");
return -EINVAL;
}
/* Non-request-stackable devices can't be used for request-based dm */
- if (!tgt->type->iterate_devices ||
- !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
+ if (!ti->type->iterate_devices ||
+ !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
@@ -983,11 +978,9 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
if (dm_target_is_wildcard(ti->type))
return ti;
}
@@ -1010,32 +1003,56 @@ static bool dm_table_supports_poll(struct dm_table *t);
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
- unsigned per_io_data_size = 0;
- unsigned min_pool_size = 0;
- struct dm_target *ti;
- unsigned i;
- bool poll_supported = false;
+ unsigned int per_io_data_size = 0, front_pad, io_front_pad;
+ unsigned int min_pool_size = 0, pool_size;
+ struct dm_md_mempools *pools;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
- if (__table_type_bio_based(type)) {
- for (i = 0; i < t->num_targets; i++) {
- ti = t->targets + i;
- per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
- min_pool_size = max(min_pool_size, ti->num_flush_bios);
- }
- poll_supported = dm_table_supports_poll(t);
+ pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
+ if (!pools)
+ return -ENOMEM;
+
+ if (type == DM_TYPE_REQUEST_BASED) {
+ pool_size = dm_get_reserved_rq_based_ios();
+ front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
+ goto init_bs;
}
- t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
- t->integrity_supported, poll_supported);
- if (!t->mempools)
- return -ENOMEM;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+ per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
+ min_pool_size = max(min_pool_size, ti->num_flush_bios);
+ }
+ pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
+ front_pad = roundup(per_io_data_size,
+ __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
+
+ io_front_pad = roundup(per_io_data_size,
+ __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
+ if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
+ dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
+ goto out_free_pools;
+ if (t->integrity_supported &&
+ bioset_integrity_create(&pools->io_bs, pool_size))
+ goto out_free_pools;
+init_bs:
+ if (bioset_init(&pools->bs, pool_size, front_pad, 0))
+ goto out_free_pools;
+ if (t->integrity_supported &&
+ bioset_integrity_create(&pools->bs, pool_size))
+ goto out_free_pools;
+
+ t->mempools = pools;
return 0;
+
+out_free_pools:
+ dm_free_md_mempools(pools);
+ return -ENOMEM;
}
static int setup_indexes(struct dm_table *t)
@@ -1100,10 +1117,10 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
struct list_head *devices = dm_table_get_devices(t);
struct dm_dev_internal *dd = NULL;
struct gendisk *prev_disk = NULL, *template_disk = NULL;
- unsigned i;
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
+
if (!dm_target_passes_integrity(ti->type))
goto no_integrity;
}
@@ -1217,18 +1234,19 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
struct dm_keyslot_evict_args args = { key };
struct dm_table *t;
int srcu_idx;
- int i;
- struct dm_target *ti;
t = dm_get_live_table(md, &srcu_idx);
if (!t)
return 0;
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
if (!ti->type->iterate_devices)
continue;
ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
}
+
dm_put_live_table(md, srcu_idx);
return args.err;
}
@@ -1277,7 +1295,6 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
{
struct dm_crypto_profile *dmcp;
struct blk_crypto_profile *profile;
- struct dm_target *ti;
unsigned int i;
bool empty_profile = true;
@@ -1293,8 +1310,8 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
memset(profile->modes_supported, 0xFF,
sizeof(profile->modes_supported));
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_passes_crypto(ti->type)) {
blk_crypto_intersect_capabilities(profile, NULL);
@@ -1444,14 +1461,6 @@ inline sector_t dm_table_get_size(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_get_size);
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
-{
- if (index >= t->num_targets)
- return NULL;
-
- return t->targets + index;
-}
-
/*
* Search the btree for the correct target.
*
@@ -1512,11 +1521,8 @@ static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_any_dev_attr(struct dm_table *t,
iterate_devices_callout_fn func, void *data)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, func, data))
@@ -1538,11 +1544,8 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_poll(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
@@ -1558,18 +1561,15 @@ static bool dm_table_supports_poll(struct dm_table *t)
* Returns false if the result is unknown because a target doesn't
* support iterate_devices.
*/
-bool dm_table_has_no_data_devices(struct dm_table *table)
+bool dm_table_has_no_data_devices(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i, num_devices;
-
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- ti = dm_table_get_target(table, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+ unsigned num_devices = 0;
if (!ti->type->iterate_devices)
return false;
- num_devices = 0;
ti->type->iterate_devices(ti, count_device, &num_devices);
if (num_devices)
return false;
@@ -1597,11 +1597,8 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_zoned_model(struct dm_table *t,
enum blk_zoned_model zoned_model)
{
- struct dm_target *ti;
- unsigned i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (dm_target_supports_zoned_hm(ti->type)) {
if (!ti->type->iterate_devices ||
@@ -1620,13 +1617,11 @@ static bool dm_table_supports_zoned_model(struct dm_table *t,
static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
unsigned int *zone_sectors = data;
- if (!blk_queue_is_zoned(q))
+ if (!bdev_is_zoned(dev->bdev))
return 0;
-
- return blk_queue_zone_sectors(q) != *zone_sectors;
+ return bdev_zone_sectors(dev->bdev) != *zone_sectors;
}
/*
@@ -1634,16 +1629,16 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *
* zone sectors, if the destination device is a zoned block device, it shall
* have the specified zone_sectors.
*/
-static int validate_hardware_zoned_model(struct dm_table *table,
+static int validate_hardware_zoned_model(struct dm_table *t,
enum blk_zoned_model zoned_model,
unsigned int zone_sectors)
{
if (zoned_model == BLK_ZONED_NONE)
return 0;
- if (!dm_table_supports_zoned_model(table, zoned_model)) {
+ if (!dm_table_supports_zoned_model(t, zoned_model)) {
DMERR("%s: zoned model is not consistent across all devices",
- dm_device_name(table->md));
+ dm_device_name(t->md));
return -EINVAL;
}
@@ -1651,9 +1646,9 @@ static int validate_hardware_zoned_model(struct dm_table *table,
if (!zone_sectors || !is_power_of_2(zone_sectors))
return -EINVAL;
- if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
+ if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
DMERR("%s: zone sectors is not consistent across all zoned devices",
- dm_device_name(table->md));
+ dm_device_name(t->md));
return -EINVAL;
}
@@ -1663,21 +1658,19 @@ static int validate_hardware_zoned_model(struct dm_table *table,
/*
* Establish the new table's queue_limits and validate them.
*/
-int dm_calculate_queue_limits(struct dm_table *table,
+int dm_calculate_queue_limits(struct dm_table *t,
struct queue_limits *limits)
{
- struct dm_target *ti;
struct queue_limits ti_limits;
- unsigned i;
enum blk_zoned_model zoned_model = BLK_ZONED_NONE;
unsigned int zone_sectors = 0;
blk_set_stacking_limits(limits);
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- blk_set_stacking_limits(&ti_limits);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- ti = dm_table_get_target(table, i);
+ blk_set_stacking_limits(&ti_limits);
if (!ti->type->iterate_devices)
goto combine_limits;
@@ -1718,7 +1711,7 @@ combine_limits:
DMWARN("%s: adding target device "
"(start sect %llu len %llu) "
"caused an alignment inconsistency",
- dm_device_name(table->md),
+ dm_device_name(t->md),
(unsigned long long) ti->begin,
(unsigned long long) ti->len);
}
@@ -1738,10 +1731,10 @@ combine_limits:
zoned_model = limits->zoned;
zone_sectors = limits->chunk_sectors;
}
- if (validate_hardware_zoned_model(table, zoned_model, zone_sectors))
+ if (validate_hardware_zoned_model(t, zoned_model, zone_sectors))
return -EINVAL;
- return validate_hardware_logical_block_alignment(table, limits);
+ return validate_hardware_logical_block_alignment(t, limits);
}
/*
@@ -1785,17 +1778,14 @@ static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
{
- struct dm_target *ti;
- unsigned i;
-
/*
* Require at least one underlying device to support flushes.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
* supporting flushes must provide.
*/
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_flush_bios)
continue;
@@ -1849,11 +1839,8 @@ static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *
static bool dm_table_supports_write_zeroes(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_write_zeroes_bios)
return false;
@@ -1876,11 +1863,8 @@ static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_nowait(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_supports_nowait(ti->type))
return false;
@@ -1901,11 +1885,8 @@ static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_discards(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_discard_bios)
return false;
@@ -1933,11 +1914,8 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
static bool dm_table_supports_secure_erase(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_secure_erase_bios)
return false;
@@ -2067,11 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
return 0;
}
-unsigned int dm_table_get_num_targets(struct dm_table *t)
-{
- return t->num_targets;
-}
-
struct list_head *dm_table_get_devices(struct dm_table *t)
{
return &t->devices;
@@ -2091,12 +2064,11 @@ enum suspend_mode {
static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
{
- int i = t->num_targets;
- struct dm_target *ti = t->targets;
-
lockdep_assert_held(&t->md->suspend_lock);
- while (i--) {
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
switch (mode) {
case PRESUSPEND:
if (ti->type->presuspend)
@@ -2111,7 +2083,6 @@ static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
ti->type->postsuspend(ti);
break;
}
- ti++;
}
}
@@ -2141,12 +2112,13 @@ void dm_table_postsuspend_targets(struct dm_table *t)
int dm_table_resume_targets(struct dm_table *t)
{
- int i, r = 0;
+ unsigned int i;
+ int r = 0;
lockdep_assert_held(&t->md->suspend_lock);
for (i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = t->targets + i;
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->preresume)
continue;
@@ -2160,7 +2132,7 @@ int dm_table_resume_targets(struct dm_table *t)
}
for (i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = t->targets + i;
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->resume)
ti->type->resume(ti);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2db7030aba00..a27395c8621f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 84c083f76673..e76c96c760a9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
diff --git a/drivers/md/dm-verity-loadpin.c b/drivers/md/dm-verity-loadpin.c
new file mode 100644
index 000000000000..387ec43aef72
--- /dev/null
+++ b/drivers/md/dm-verity-loadpin.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/dm-verity-loadpin.h>
+
+#include "dm.h"
+#include "dm-core.h"
+#include "dm-verity.h"
+
+#define DM_MSG_PREFIX "verity-loadpin"
+
+LIST_HEAD(dm_verity_loadpin_trusted_root_digests);
+
+static bool is_trusted_verity_target(struct dm_target *ti)
+{
+ u8 *root_digest;
+ unsigned int digest_size;
+ struct dm_verity_loadpin_trusted_root_digest *trd;
+ bool trusted = false;
+
+ if (!dm_is_verity_target(ti))
+ return false;
+
+ if (dm_verity_get_root_digest(ti, &root_digest, &digest_size))
+ return false;
+
+ list_for_each_entry(trd, &dm_verity_loadpin_trusted_root_digests, node) {
+ if ((trd->len == digest_size) &&
+ !memcmp(trd->data, root_digest, digest_size)) {
+ trusted = true;
+ break;
+ }
+ }
+
+ kfree(root_digest);
+
+ return trusted;
+}
+
+/*
+ * Determines whether the file system of a superblock is located on
+ * a verity device that is trusted by LoadPin.
+ */
+bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev)
+{
+ struct mapped_device *md;
+ struct dm_table *table;
+ struct dm_target *ti;
+ int srcu_idx;
+ bool trusted = false;
+
+ if (list_empty(&dm_verity_loadpin_trusted_root_digests))
+ return false;
+
+ md = dm_get_md(bdev->bd_dev);
+ if (!md)
+ return false;
+
+ table = dm_get_live_table(md, &srcu_idx);
+
+ if (table->num_targets != 1)
+ goto out;
+
+ ti = dm_table_get_target(table, 0);
+
+ if (is_trusted_verity_target(ti))
+ trusted = true;
+
+out:
+ dm_put_live_table(md, srcu_idx);
+ dm_put(md);
+
+ return trusted;
+}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index d6dbd47492a8..4fd853a56b1a 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/scatterlist.h>
+#include <linux/string.h>
#define DM_MSG_PREFIX "verity"
@@ -527,11 +528,10 @@ static int verity_verify_io(struct dm_verity_io *io)
if (v->validated_blocks)
set_bit(cur_block, v->validated_blocks);
continue;
- }
- else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
- cur_block, NULL, &start) == 0)
+ } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ cur_block, NULL, &start) == 0) {
continue;
- else {
+ } else {
if (bio->bi_status) {
/*
* Error correction failed; Just return error
@@ -1310,10 +1310,40 @@ bad:
return r;
}
+/*
+ * Check whether a DM target is a verity target.
+ */
+bool dm_is_verity_target(struct dm_target *ti)
+{
+ return ti->type->module == THIS_MODULE;
+}
+
+/*
+ * Get the root digest of a verity target.
+ *
+ * Returns a copy of the root digest, the caller is responsible for
+ * freeing the memory of the digest.
+ */
+int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned int *digest_size)
+{
+ struct dm_verity *v = ti->private;
+
+ if (!dm_is_verity_target(ti))
+ return -EINVAL;
+
+ *root_digest = kmemdup(v->root_digest, v->digest_size, GFP_KERNEL);
+ if (*root_digest == NULL)
+ return -ENOMEM;
+
+ *digest_size = v->digest_size;
+
+ return 0;
+}
+
static struct target_type verity_target = {
.name = "verity",
.features = DM_TARGET_IMMUTABLE,
- .version = {1, 8, 0},
+ .version = {1, 8, 1},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 4e769d13473a..c832cc3e3d24 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -129,4 +129,8 @@ extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
+extern bool dm_is_verity_target(struct dm_target *ti);
+extern int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest,
+ unsigned int *digest_size);
+
#endif /* DM_VERITY_H */
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index d74c5a7a0ab4..1fc161d65673 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -22,7 +22,7 @@
#define HIGH_WATERMARK 50
#define LOW_WATERMARK 45
-#define MAX_WRITEBACK_JOBS 0
+#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
#define ENDIO_LATENCY 16
#define WRITEBACK_LATENCY 64
#define AUTOCOMMIT_BLOCKS_SSD 65536
@@ -523,8 +523,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
region.sector += wc->start_sector;
atomic_inc(&endio.count);
- req.bi_op = REQ_OP_WRITE;
- req.bi_op_flags = REQ_SYNC;
+ req.bi_opf = REQ_OP_WRITE | REQ_SYNC;
req.mem.type = DM_IO_VMA;
req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY;
req.client = wc->dm_io;
@@ -562,8 +561,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
region.sector += wc->start_sector;
- req.bi_op = REQ_OP_WRITE;
- req.bi_op_flags = REQ_SYNC | REQ_FUA;
+ req.bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_FUA;
req.mem.type = DM_IO_VMA;
req.mem.ptr.vma = (char *)wc->memory_map;
req.client = wc->dm_io;
@@ -592,8 +590,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
region.bdev = dev->bdev;
region.sector = 0;
region.count = 0;
- req.bi_op = REQ_OP_WRITE;
- req.bi_op_flags = REQ_PREFLUSH;
+ req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
req.mem.type = DM_IO_KMEM;
req.mem.ptr.addr = NULL;
req.client = wc->dm_io;
@@ -981,8 +978,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors
region.bdev = wc->ssd_dev->bdev;
region.sector = wc->start_sector;
region.count = n_sectors;
- req.bi_op = REQ_OP_READ;
- req.bi_op_flags = REQ_SYNC;
+ req.bi_opf = REQ_OP_READ | REQ_SYNC;
req.mem.type = DM_IO_VMA;
req.mem.ptr.vma = (char *)wc->memory_map;
req.client = wc->dm_io;
@@ -1329,8 +1325,8 @@ enum wc_map_op {
WC_MAP_ERROR,
};
-static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
- struct wc_entry *e)
+static void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e)
{
if (e) {
sector_t next_boundary =
@@ -1338,8 +1334,6 @@ static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, stru
if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
dm_accept_partial_bio(bio, next_boundary);
}
-
- return WC_MAP_REMAP_ORIGIN;
}
static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
@@ -1366,14 +1360,16 @@ read_next_block:
map_op = WC_MAP_REMAP;
}
} else {
- map_op = writecache_map_remap_origin(wc, bio, e);
+ writecache_map_remap_origin(wc, bio, e);
+ wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
+ map_op = WC_MAP_REMAP_ORIGIN;
}
return map_op;
}
-static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
- struct wc_entry *e, bool search_used)
+static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e, bool search_used)
{
unsigned bio_size = wc->block_size;
sector_t start_cache_sec = cache_sector(wc, e);
@@ -1413,14 +1409,15 @@ static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct b
bio->bi_iter.bi_sector = start_cache_sec;
dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
+ wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
+
if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
wc->uncommitted_blocks = 0;
queue_work(wc->writeback_wq, &wc->flush_work);
} else {
writecache_schedule_autocommit(wc);
}
-
- return WC_MAP_REMAP;
}
static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
@@ -1430,9 +1427,10 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
do {
bool found_entry = false;
bool search_used = false;
- wc->stats.writes++;
- if (writecache_has_error(wc))
+ if (writecache_has_error(wc)) {
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
return WC_MAP_ERROR;
+ }
e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
if (e) {
if (!writecache_entry_is_committed(wc, e)) {
@@ -1456,9 +1454,11 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
if (unlikely(!e)) {
if (!WC_MODE_PMEM(wc) && !found_entry) {
direct_write:
- wc->stats.writes_around++;
e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- return writecache_map_remap_origin(wc, bio, e);
+ writecache_map_remap_origin(wc, bio, e);
+ wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits;
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
+ return WC_MAP_REMAP_ORIGIN;
}
wc->stats.writes_blocked_on_freelist++;
writecache_wait_on_freelist(wc);
@@ -1469,10 +1469,13 @@ direct_write:
wc->uncommitted_blocks++;
wc->stats.writes_allocate++;
bio_copy:
- if (WC_MODE_PMEM(wc))
+ if (WC_MODE_PMEM(wc)) {
bio_copy_block(wc, bio, memory_data(wc, e));
- else
- return writecache_bio_copy_ssd(wc, bio, e, search_used);
+ wc->stats.writes++;
+ } else {
+ writecache_bio_copy_ssd(wc, bio, e, search_used);
+ return WC_MAP_REMAP;
+ }
} while (bio->bi_iter.bi_size);
if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
@@ -1507,7 +1510,7 @@ static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio
static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
{
- wc->stats.discards++;
+ wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits;
if (writecache_has_error(wc))
return WC_MAP_ERROR;
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 3e7b1fe1580b..3dafc0e8b7a9 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -139,13 +139,11 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
void dm_cleanup_zoned_dev(struct mapped_device *md)
{
- struct request_queue *q = md->queue;
-
- if (q) {
- kfree(q->conv_zones_bitmap);
- q->conv_zones_bitmap = NULL;
- kfree(q->seq_zones_wlock);
- q->seq_zones_wlock = NULL;
+ if (md->disk) {
+ kfree(md->disk->conv_zones_bitmap);
+ md->disk->conv_zones_bitmap = NULL;
+ kfree(md->disk->seq_zones_wlock);
+ md->disk->seq_zones_wlock = NULL;
}
kvfree(md->zwp_offset);
@@ -179,31 +177,31 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
struct mapped_device *md = data;
- struct request_queue *q = md->queue;
+ struct gendisk *disk = md->disk;
switch (zone->type) {
case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!q->conv_zones_bitmap) {
- q->conv_zones_bitmap =
- kcalloc(BITS_TO_LONGS(q->nr_zones),
+ if (!disk->conv_zones_bitmap) {
+ disk->conv_zones_bitmap =
+ kcalloc(BITS_TO_LONGS(disk->nr_zones),
sizeof(unsigned long), GFP_NOIO);
- if (!q->conv_zones_bitmap)
+ if (!disk->conv_zones_bitmap)
return -ENOMEM;
}
- set_bit(idx, q->conv_zones_bitmap);
+ set_bit(idx, disk->conv_zones_bitmap);
break;
case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF:
- if (!q->seq_zones_wlock) {
- q->seq_zones_wlock =
- kcalloc(BITS_TO_LONGS(q->nr_zones),
+ if (!disk->seq_zones_wlock) {
+ disk->seq_zones_wlock =
+ kcalloc(BITS_TO_LONGS(disk->nr_zones),
sizeof(unsigned long), GFP_NOIO);
- if (!q->seq_zones_wlock)
+ if (!disk->seq_zones_wlock)
return -ENOMEM;
}
if (!md->zwp_offset) {
md->zwp_offset =
- kvcalloc(q->nr_zones, sizeof(unsigned int),
+ kvcalloc(disk->nr_zones, sizeof(unsigned int),
GFP_KERNEL);
if (!md->zwp_offset)
return -ENOMEM;
@@ -228,7 +226,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
*/
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
{
- struct request_queue *q = md->queue;
+ struct gendisk *disk = md->disk;
unsigned int noio_flag;
int ret;
@@ -236,7 +234,7 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
* Check if something changed. If yes, cleanup the current resources
* and reallocate everything.
*/
- if (!q->nr_zones || q->nr_zones != md->nr_zones)
+ if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
dm_cleanup_zoned_dev(md);
if (md->nr_zones)
return 0;
@@ -246,17 +244,17 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
* operations in this context are done as if GFP_NOIO was specified.
*/
noio_flag = memalloc_noio_save();
- ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones,
+ ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones,
dm_zone_revalidate_cb, md);
memalloc_noio_restore(noio_flag);
if (ret < 0)
goto err;
- if (ret != q->nr_zones) {
+ if (ret != disk->nr_zones) {
ret = -EIO;
goto err;
}
- md->nr_zones = q->nr_zones;
+ md->nr_zones = disk->nr_zones;
return 0;
@@ -270,16 +268,13 @@ static int device_not_zone_append_capable(struct dm_target *ti,
struct dm_dev *dev, sector_t start,
sector_t len, void *data)
{
- return !blk_queue_is_zoned(bdev_get_queue(dev->bdev));
+ return !bdev_is_zoned(dev->bdev);
}
static bool dm_table_supports_zone_append(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->emulate_zone_append)
return false;
@@ -301,7 +296,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
* correct value to be exposed in sysfs queue/nr_zones.
*/
WARN_ON_ONCE(queue_is_mq(q));
- q->nr_zones = blkdev_nr_zones(md->disk);
+ md->disk->nr_zones = bdev_nr_zones(md->disk->part0);
/* Check if zone append is natively supported */
if (dm_table_supports_zone_append(t)) {
@@ -334,7 +329,7 @@ static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
unsigned int *wp_ofst)
{
- sector_t sector = zno * blk_queue_zone_sectors(md->queue);
+ sector_t sector = zno * bdev_zone_sectors(md->disk->part0);
unsigned int noio_flag;
struct dm_table *t;
int srcu_idx, ret;
@@ -361,7 +356,7 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
}
struct orig_bio_details {
- unsigned int op;
+ enum req_op op;
unsigned int nr_sectors;
};
@@ -373,7 +368,7 @@ struct orig_bio_details {
static bool dm_zone_map_bio_begin(struct mapped_device *md,
unsigned int zno, struct bio *clone)
{
- sector_t zsectors = blk_queue_zone_sectors(md->queue);
+ sector_t zsectors = bdev_zone_sectors(md->disk->part0);
unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
/*
@@ -443,7 +438,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z
return BLK_STS_OK;
case REQ_OP_ZONE_FINISH:
WRITE_ONCE(md->zwp_offset[zno],
- blk_queue_zone_sectors(md->queue));
+ bdev_zone_sectors(md->disk->part0));
return BLK_STS_OK;
case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE:
@@ -466,26 +461,26 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z
}
}
-static inline void dm_zone_lock(struct request_queue *q,
- unsigned int zno, struct bio *clone)
+static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno,
+ struct bio *clone)
{
if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
return;
- wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
}
-static inline void dm_zone_unlock(struct request_queue *q,
- unsigned int zno, struct bio *clone)
+static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno,
+ struct bio *clone)
{
if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
return;
- WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock));
- clear_bit_unlock(zno, q->seq_zones_wlock);
+ WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock));
+ clear_bit_unlock(zno, disk->seq_zones_wlock);
smp_mb__after_atomic();
- wake_up_bit(q->seq_zones_wlock, zno);
+ wake_up_bit(disk->seq_zones_wlock, zno);
bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
}
@@ -520,7 +515,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
struct dm_io *io = tio->io;
struct dm_target *ti = tio->ti;
struct mapped_device *md = io->md;
- struct request_queue *q = md->queue;
struct bio *clone = &tio->clone;
struct orig_bio_details orig_bio_details;
unsigned int zno;
@@ -536,7 +530,7 @@ int dm_zone_map_bio(struct dm_target_io *tio)
/* Lock the target zone */
zno = bio_zone_no(clone);
- dm_zone_lock(q, zno, clone);
+ dm_zone_lock(md->disk, zno, clone);
orig_bio_details.nr_sectors = bio_sectors(clone);
orig_bio_details.op = bio_op(clone);
@@ -546,7 +540,7 @@ int dm_zone_map_bio(struct dm_target_io *tio)
* both valid, and if the bio is a zone append, remap it to a write.
*/
if (!dm_zone_map_bio_begin(md, zno, clone)) {
- dm_zone_unlock(q, zno, clone);
+ dm_zone_unlock(md->disk, zno, clone);
return DM_MAPIO_KILL;
}
@@ -570,12 +564,12 @@ int dm_zone_map_bio(struct dm_target_io *tio)
sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
*tio->len_ptr);
if (sts != BLK_STS_OK)
- dm_zone_unlock(q, zno, clone);
+ dm_zone_unlock(md->disk, zno, clone);
break;
case DM_MAPIO_REQUEUE:
case DM_MAPIO_KILL:
default:
- dm_zone_unlock(q, zno, clone);
+ dm_zone_unlock(md->disk, zno, clone);
sts = BLK_STS_IOERR;
break;
}
@@ -592,7 +586,7 @@ int dm_zone_map_bio(struct dm_target_io *tio)
void dm_zone_endio(struct dm_io *io, struct bio *clone)
{
struct mapped_device *md = io->md;
- struct request_queue *q = md->queue;
+ struct gendisk *disk = md->disk;
struct bio *orig_bio = io->orig_bio;
unsigned int zwp_offset;
unsigned int zno;
@@ -608,7 +602,8 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
*/
if (clone->bi_status == BLK_STS_OK &&
bio_op(clone) == REQ_OP_ZONE_APPEND) {
- sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1;
+ sector_t mask =
+ (sector_t)bdev_zone_sectors(disk->part0) - 1;
orig_bio->bi_iter.bi_sector +=
clone->bi_iter.bi_sector & mask;
@@ -649,5 +644,5 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
zwp_offset - bio_sectors(orig_bio);
}
- dm_zone_unlock(q, zno, clone);
+ dm_zone_unlock(disk, zno, clone);
}
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index d1ea66114d14..34db364c23a8 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -737,7 +737,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
/*
* Read/write a metadata block.
*/
-static int dmz_rdwr_block(struct dmz_dev *dev, int op,
+static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op,
sector_t block, struct page *page)
{
struct bio *bio;
@@ -2045,7 +2045,8 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
* allocated and used to map the chunk.
* The zone returned will be set to the active state.
*/
-struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op)
+struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd,
+ unsigned int chunk, enum req_op op)
{
struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT];
struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data;
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 0ec5d8b9b1a4..95b132b52f33 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -764,8 +764,7 @@ static void dmz_put_zoned_device(struct dm_target *ti)
static int dmz_fixup_devices(struct dm_target *ti)
{
struct dmz_target *dmz = ti->private;
- struct dmz_dev *reg_dev, *zoned_dev;
- struct request_queue *q;
+ struct dmz_dev *reg_dev = NULL;
sector_t zone_nr_sectors = 0;
int i;
@@ -780,32 +779,32 @@ static int dmz_fixup_devices(struct dm_target *ti)
return -EINVAL;
}
for (i = 1; i < dmz->nr_ddevs; i++) {
- zoned_dev = &dmz->dev[i];
+ struct dmz_dev *zoned_dev = &dmz->dev[i];
+ struct block_device *bdev = zoned_dev->bdev;
+
if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
ti->error = "Secondary disk is not a zoned device";
return -EINVAL;
}
- q = bdev_get_queue(zoned_dev->bdev);
if (zone_nr_sectors &&
- zone_nr_sectors != blk_queue_zone_sectors(q)) {
+ zone_nr_sectors != bdev_zone_sectors(bdev)) {
ti->error = "Zone nr sectors mismatch";
return -EINVAL;
}
- zone_nr_sectors = blk_queue_zone_sectors(q);
+ zone_nr_sectors = bdev_zone_sectors(bdev);
zoned_dev->zone_nr_sectors = zone_nr_sectors;
- zoned_dev->nr_zones =
- blkdev_nr_zones(zoned_dev->bdev->bd_disk);
+ zoned_dev->nr_zones = bdev_nr_zones(bdev);
}
} else {
- reg_dev = NULL;
- zoned_dev = &dmz->dev[0];
+ struct dmz_dev *zoned_dev = &dmz->dev[0];
+ struct block_device *bdev = zoned_dev->bdev;
+
if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
ti->error = "Disk is not a zoned device";
return -EINVAL;
}
- q = bdev_get_queue(zoned_dev->bdev);
- zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q);
- zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk);
+ zoned_dev->zone_nr_sectors = bdev_zone_sectors(bdev);
+ zoned_dev->nr_zones = bdev_nr_zones(bdev);
}
if (reg_dev) {
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index a02744a0846c..265494d3f711 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -248,7 +248,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
unsigned int dev_idx, bool idle);
struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd,
- unsigned int chunk, int op);
+ unsigned int chunk, enum req_op op);
void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *zone);
struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
struct dm_zone *dzone);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d8f16183bf27..99642f69bfa7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -88,10 +88,6 @@ struct clone_info {
bool submit_as_polled:1;
};
-#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
-#define DM_IO_BIO_OFFSET \
- (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
-
static inline struct dm_target_io *clone_to_tio(struct bio *clone)
{
return container_of(clone, struct dm_target_io, clone);
@@ -415,7 +411,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
{
- struct dm_target *tgt;
+ struct dm_target *ti;
struct dm_table *map;
int r;
@@ -426,17 +422,17 @@ retry:
return r;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(map) != 1)
+ if (map->num_targets != 1)
return r;
- tgt = dm_table_get_target(map, 0);
- if (!tgt->type->prepare_ioctl)
+ ti = dm_table_get_target(map, 0);
+ if (!ti->type->prepare_ioctl)
return r;
if (dm_suspended_md(md))
return -EAGAIN;
- r = tgt->type->prepare_ioctl(tgt, bdev);
+ r = ti->type->prepare_ioctl(ti, bdev);
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
dm_put_live_table(md, *srcu_idx);
msleep(10);
@@ -555,6 +551,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
unsigned long flags;
/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
spin_lock_irqsave(&io->lock, flags);
+ if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
+ spin_unlock_irqrestore(&io->lock, flags);
+ return;
+ }
dm_io_set_flag(io, DM_IO_ACCOUNTED);
spin_unlock_irqrestore(&io->lock, flags);
}
@@ -574,9 +574,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct bio *clone;
clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
-
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -609,6 +606,7 @@ static void free_io(struct dm_io *io)
static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
{
+ struct mapped_device *md = ci->io->md;
struct dm_target_io *tio;
struct bio *clone;
@@ -618,14 +616,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
/* alloc_io() already initialized embedded clone */
clone = &tio->clone;
} else {
- struct mapped_device *md = ci->io->md;
-
clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
&md->mempools->bs);
if (!clone)
return NULL;
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
/* REQ_DM_POLL_LIST shouldn't be inherited */
clone->bi_opf &= ~REQ_DM_POLL_LIST;
@@ -641,6 +635,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
tio->len_ptr = len;
tio->old_sector = 0;
+ /* Set default bdev, but target must bio_set_dev() before issuing IO */
+ clone->bi_bdev = md->disk->part0;
+ if (unlikely(ti->needs_bio_set_dev))
+ bio_set_dev(clone, md->disk->part0);
+
if (len) {
clone->bi_iter.bi_size = to_bytes(*len);
if (bio_integrity(clone))
@@ -711,18 +710,18 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
}
static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
- int *srcu_idx, struct bio *bio)
+ int *srcu_idx, blk_opf_t bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
return dm_get_live_table_fast(md);
else
return dm_get_live_table(md, srcu_idx);
}
static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
- struct bio *bio)
+ blk_opf_t bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
dm_put_live_table_fast(md);
else
dm_put_live_table(md, srcu_idx);
@@ -879,22 +878,63 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
-static void dm_io_complete(struct dm_io *io)
+static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
{
- blk_status_t io_error;
struct mapped_device *md = io->md;
+
+ if (first_stage) {
+ struct dm_io *next = md->requeue_list;
+
+ md->requeue_list = io;
+ io->next = next;
+ } else {
+ bio_list_add_head(&md->deferred, io->orig_bio);
+ }
+}
+
+static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
+{
+ if (first_stage)
+ queue_work(md->wq, &md->requeue_work);
+ else
+ queue_work(md->wq, &md->work);
+}
+
+/*
+ * Return true if the dm_io's original bio is requeued.
+ * io->status is updated with error if requeue disallowed.
+ */
+static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
+{
struct bio *bio = io->orig_bio;
+ bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
+ bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
+ (bio->bi_opf & REQ_POLLED));
+ struct mapped_device *md = io->md;
+ bool requeued = false;
- if (io->status == BLK_STS_DM_REQUEUE) {
+ if (handle_requeue || handle_polled_eagain) {
unsigned long flags;
+
+ if (bio->bi_opf & REQ_POLLED) {
+ /*
+ * Upper layer won't help us poll split bio
+ * (io->orig_bio may only reflect a subset of the
+ * pre-split original) so clear REQ_POLLED.
+ */
+ bio_clear_polled(bio);
+ }
+
/*
- * Target requested pushing back the I/O.
+ * Target requested pushing back the I/O or
+ * polled IO hit BLK_STS_AGAIN.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md) &&
- !WARN_ON_ONCE(dm_is_zone_write(md, bio))) {
- /* NOTE early return due to BLK_STS_DM_REQUEUE below */
- bio_list_add_head(&md->deferred, bio);
+ if ((__noflush_suspending(md) &&
+ !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
+ handle_polled_eagain || first_stage) {
+ dm_requeue_add_io(io, first_stage);
+ requeued = true;
} else {
/*
* noflush suspend was interrupted or this is
@@ -905,6 +945,23 @@ static void dm_io_complete(struct dm_io *io)
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
+ if (requeued)
+ dm_kick_requeue(md, first_stage);
+
+ return requeued;
+}
+
+static void __dm_io_complete(struct dm_io *io, bool first_stage)
+{
+ struct bio *bio = io->orig_bio;
+ struct mapped_device *md = io->md;
+ blk_status_t io_error;
+ bool requeued;
+
+ requeued = dm_handle_requeue(io, first_stage);
+ if (requeued && first_stage)
+ return;
+
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
dm_end_io_acct(io);
@@ -924,21 +981,9 @@ static void dm_io_complete(struct dm_io *io)
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
- if (io_error == BLK_STS_DM_REQUEUE || io_error == BLK_STS_AGAIN) {
- if (bio->bi_opf & REQ_POLLED) {
- /*
- * Upper layer won't help us poll split bio (io->orig_bio
- * may only reflect a subset of the pre-split original)
- * so clear REQ_POLLED in case of requeue.
- */
- bio_clear_polled(bio);
- if (io_error == BLK_STS_AGAIN) {
- /* io_uring doesn't handle BLK_STS_AGAIN (yet) */
- queue_io(md, bio);
- }
- }
+ /* Return early if the original bio was requeued */
+ if (requeued)
return;
- }
if (bio_is_flush_with_data(bio)) {
/*
@@ -955,6 +1000,58 @@ static void dm_io_complete(struct dm_io *io)
}
}
+static void dm_wq_requeue_work(struct work_struct *work)
+{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ requeue_work);
+ unsigned long flags;
+ struct dm_io *io;
+
+ /* reuse deferred lock to simplify dm_handle_requeue */
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ io = md->requeue_list;
+ md->requeue_list = NULL;
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+
+ while (io) {
+ struct dm_io *next = io->next;
+
+ dm_io_rewind(io, &md->queue->bio_split);
+
+ io->next = NULL;
+ __dm_io_complete(io, false);
+ io = next;
+ }
+}
+
+/*
+ * Two staged requeue:
+ *
+ * 1) io->orig_bio points to the real original bio, and the part mapped to
+ * this io must be requeued, instead of other parts of the original bio.
+ *
+ * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
+ */
+static void dm_io_complete(struct dm_io *io)
+{
+ bool first_requeue;
+
+ /*
+ * Only dm_io that has been split needs two stage requeue, otherwise
+ * we may run into long bio clone chain during suspend and OOM could
+ * be triggered.
+ *
+ * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
+ * also aren't handled via the first stage requeue.
+ */
+ if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
+ first_requeue = true;
+ else
+ first_requeue = false;
+
+ __dm_io_complete(io, first_requeue);
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -1026,7 +1123,7 @@ static void clone_endio(struct bio *bio)
}
if (static_branch_unlikely(&zoned_enabled) &&
- unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev))))
+ unlikely(bdev_is_zoned(bio->bi_bdev)))
dm_zone_endio(io, bio);
if (endio) {
@@ -1079,23 +1176,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
{
sector_t target_offset = dm_target_offset(ti, sector);
sector_t len = max_io_len_target_boundary(ti, target_offset);
- sector_t max_len;
/*
* Does the target need to split IO even further?
* - varied (per target) IO splitting is a tenet of DM; this
* explains why stacked chunk_sectors based splitting via
- * blk_max_size_offset() isn't possible here. So pass in
- * ti->max_io_len to override stacked chunk_sectors.
+ * blk_queue_split() isn't possible here.
*/
- if (ti->max_io_len) {
- max_len = blk_max_size_offset(ti->table->md->queue,
- target_offset, ti->max_io_len);
- if (len > max_len)
- len = max_len;
- }
-
- return len;
+ if (!ti->max_io_len)
+ return len;
+ return min_t(sector_t, len,
+ min(queue_max_sectors(ti->table->md->queue),
+ blk_chunk_sectors_left(target_offset, ti->max_io_len)));
}
int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
@@ -1238,6 +1330,7 @@ out:
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_io *io = tio->io;
unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
@@ -1253,8 +1346,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
- dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
- tio->io->sectors = n_sectors;
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = n_sectors;
+ io->sector_offset = bio_sectors(io->orig_bio);
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
@@ -1377,17 +1471,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len)
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
- }
-
- if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&io->md->stats))) {
- /*
- * Save bi_sector in terms of its offset from end of
- * original bio, only needed for DM-stats' benefit.
- * - saved regardless of whether split needed so that
- * dm_accept_partial_bio() doesn't need to.
- */
- io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ io->sector_offset = bio_sectors(ci->bio);
}
}
@@ -1421,11 +1505,11 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, unsigned *len)
+ unsigned int num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
- int ret = 0;
+ unsigned int ret = 0;
switch (num_bios) {
case 0:
@@ -1453,8 +1537,7 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
static void __send_empty_flush(struct clone_info *ci)
{
- unsigned target_nr = 0;
- struct dm_target *ti;
+ struct dm_table *t = ci->map;
struct bio flush_bio;
/*
@@ -1469,8 +1552,9 @@ static void __send_empty_flush(struct clone_info *ci)
ci->sector_count = 0;
ci->io->tio.clone.bi_iter.bi_size = 0;
- while ((ti = dm_table_get_target(ci->map, target_nr++))) {
- int bios;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ unsigned int bios;
+ struct dm_target *ti = dm_table_get_target(t, i);
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1490,7 +1574,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
unsigned num_bios)
{
unsigned len;
- int bios;
+ unsigned int bios;
len = min_t(sector_t, ci->sector_count,
max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
@@ -1509,7 +1593,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
static bool is_abnormal_io(struct bio *bio)
{
- unsigned int op = bio_op(bio);
+ enum req_op op = bio_op(bio);
if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
switch (op) {
@@ -1540,6 +1624,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
case REQ_OP_WRITE_ZEROES:
num_bios = ti->num_write_zeroes_bios;
break;
+ default:
+ break;
}
/*
@@ -1609,14 +1695,19 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ti = dm_table_find_target(ci->map, ci->sector);
if (unlikely(!ti))
return BLK_STS_IOERR;
- else if (unlikely(ci->is_abnormal_io))
+
+ if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
+ unlikely(!dm_target_supports_nowait(ti->type)))
+ return BLK_STS_NOTSUPP;
+
+ if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
/*
* Only support bio polling for normal IO, and the target io is
* exactly inside the dm_io instance (verified in dm_poll_dm_io)
*/
- ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED;
+ ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len);
@@ -1711,8 +1802,9 @@ static void dm_submit_bio(struct bio *bio)
struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
int srcu_idx;
struct dm_table *map;
+ blk_opf_t bio_opf = bio->bi_opf;
- map = dm_get_live_table_bio(md, &srcu_idx, bio);
+ map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
/* If suspended, or map not yet available, queue this IO for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
@@ -1728,7 +1820,7 @@ static void dm_submit_bio(struct bio *bio)
dm_split_and_process_bio(md, map, bio);
out:
- dm_put_live_table_bio(md, srcu_idx, bio);
+ dm_put_live_table_bio(md, srcu_idx, bio_opf);
}
static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
@@ -1884,7 +1976,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
del_gendisk(md->disk);
}
dm_queue_destroy_crypto_profile(md->queue);
- blk_cleanup_disk(md->disk);
+ put_disk(md->disk);
}
if (md->pending_io) {
@@ -1959,9 +2051,11 @@ static struct mapped_device *alloc_dev(int minor)
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
+ INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->requeue_list = NULL;
md->swap_bios = get_swap_bios();
sema_init(&md->swap_bios_semaphore, md->swap_bios);
mutex_init(&md->swap_bios_lock);
@@ -2968,54 +3062,6 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned per_io_data_size, unsigned min_pool_size,
- bool integrity, bool poll)
-{
- struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
- unsigned int pool_size = 0;
- unsigned int front_pad, io_front_pad;
- int ret;
-
- if (!pools)
- return NULL;
-
- switch (type) {
- case DM_TYPE_BIO_BASED:
- case DM_TYPE_DAX_BIO_BASED:
- pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
- front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
- io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0);
- if (ret)
- goto out;
- if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
- goto out;
- break;
- case DM_TYPE_REQUEST_BASED:
- pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
- front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
- /* per_io_data_size is used for blk-mq pdu at queue allocation */
- break;
- default:
- BUG();
- }
-
- ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
- if (ret)
- goto out;
-
- if (integrity && bioset_integrity_create(&pools->bs, pool_size))
- goto out;
-
- return pools;
-
-out:
- dm_free_md_mempools(pools);
-
- return NULL;
-}
-
void dm_free_md_mempools(struct dm_md_mempools *pools)
{
if (!pools)
@@ -3031,11 +3077,14 @@ struct dm_pr {
u64 old_key;
u64 new_key;
u32 flags;
+ bool abort;
bool fail_early;
+ int ret;
+ enum pr_type type;
};
static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
- void *data)
+ struct dm_pr *pr)
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *table;
@@ -3047,15 +3096,21 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
goto out;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(table) != 1)
+ if (table->num_targets != 1)
goto out;
ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = -EINVAL;
if (!ti->type->iterate_devices)
goto out;
- ret = ti->type->iterate_devices(ti, fn, data);
+ ti->type->iterate_devices(ti, fn, pr);
+ ret = 0;
out:
dm_put_live_table(md, srcu_idx);
return ret;
@@ -3069,10 +3124,24 @@ static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
{
struct dm_pr *pr = data;
const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+ int ret;
+
+ if (!ops || !ops->pr_register) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ if (!ret)
+ return 0;
+
+ if (!pr->ret)
+ pr->ret = ret;
+
+ if (pr->fail_early)
+ return -1;
- if (!ops || !ops->pr_register)
- return -EOPNOTSUPP;
- return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ return 0;
}
static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
@@ -3083,82 +3152,145 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
.new_key = new_key,
.flags = flags,
.fail_early = true,
+ .ret = 0,
};
int ret;
ret = dm_call_pr(bdev, __dm_pr_register, &pr);
- if (ret && new_key) {
- /* unregister all paths if we failed to register any path */
- pr.old_key = new_key;
- pr.new_key = 0;
- pr.flags = 0;
- pr.fail_early = false;
- dm_call_pr(bdev, __dm_pr_register, &pr);
+ if (ret) {
+ /* Didn't even get to register a path */
+ return ret;
}
+ if (!pr.ret)
+ return 0;
+ ret = pr.ret;
+
+ if (!new_key)
+ return ret;
+
+ /* unregister all paths if we failed to register any path */
+ pr.old_key = new_key;
+ pr.new_key = 0;
+ pr.flags = 0;
+ pr.fail_early = false;
+ (void) dm_call_pr(bdev, __dm_pr_register, &pr);
return ret;
}
+
+static int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_reserve) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
+}
+
static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
u32 flags)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .flags = flags,
+ .type = type,
+ .fail_early = false,
+ .ret = 0,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_reserve, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_reserve)
- r = ops->pr_reserve(bdev, key, type, flags);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+/*
+ * If there is a non-All Registrants type of reservation, the release must be
+ * sent down the holding path. For the cases where there is no reservation or
+ * the path is not the holder the device will also return success, so we must
+ * try each path to make sure we got the correct path.
+ */
+static int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_release) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type);
+ if (pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_release, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_release)
- r = ops->pr_release(bdev, key, type);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+static int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_preempt) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type,
+ pr->abort);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
enum pr_type type, bool abort)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .new_key = new_key,
+ .old_key = old_key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_preempt, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_preempt)
- r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
}
static int dm_pr_clear(struct block_device *bdev, u64 key)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a8405ce305a9..5201df03ce40 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -53,7 +53,6 @@ struct dm_io;
*---------------------------------------------------------------*/
void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
@@ -218,9 +217,6 @@ void dm_kcopyd_exit(void);
/*
* Mempool operations
*/
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned per_io_data_size, unsigned min_pool_size,
- bool integrity, bool poll);
void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index d87f674ab762..bf6dffadbe6f 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -165,7 +165,7 @@ static int read_sb_page(struct mddev *mddev, loff_t offset,
if (sync_page_io(rdev, target,
roundup(size, bdev_logical_block_size(rdev->bdev)),
- page, REQ_OP_READ, 0, true)) {
+ page, REQ_OP_READ, true)) {
page->index = index;
return 0;
}
@@ -302,7 +302,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
atomic_inc(&bitmap->pending_writes);
set_buffer_locked(bh);
set_buffer_mapped(bh);
- submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+ submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
bh = bh->b_this_page;
}
@@ -394,7 +394,7 @@ static int read_page(struct file *file, unsigned long index,
atomic_inc(&bitmap->pending_writes);
set_buffer_locked(bh);
set_buffer_mapped(bh);
- submit_bh(REQ_OP_READ, 0, bh);
+ submit_bh(REQ_OP_READ, bh);
}
blk_cur++;
bh = bh->b_this_page;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8273ac5eef06..4df78e30b76a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -993,15 +993,15 @@ int md_super_wait(struct mddev *mddev)
}
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
- struct page *page, int op, int op_flags, bool metadata_op)
+ struct page *page, blk_opf_t opf, bool metadata_op)
{
struct bio bio;
struct bio_vec bvec;
if (metadata_op && rdev->meta_bdev)
- bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags);
+ bio_init(&bio, rdev->meta_bdev, &bvec, 1, opf);
else
- bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags);
+ bio_init(&bio, rdev->bdev, &bvec, 1, opf);
if (metadata_op)
bio.bi_iter.bi_sector = sector + rdev->sb_start;
@@ -1024,7 +1024,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
if (rdev->sb_loaded)
return 0;
- if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
+ if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true))
goto fail;
rdev->sb_loaded = 1;
return 0;
@@ -1722,7 +1722,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return -EINVAL;
bb_sector = (long long)offset;
if (!sync_page_io(rdev, bb_sector, sectors << 9,
- rdev->bb_page, REQ_OP_READ, 0, true))
+ rdev->bb_page, REQ_OP_READ, true))
return -EIO;
bbp = (__le64 *)page_address(rdev->bb_page);
rdev->badblocks.shift = sb->bblog_shift;
@@ -2438,7 +2438,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
mdname(mddev), mddev->max_disks);
return -EBUSY;
}
- bdevname(rdev->bdev,b);
+ snprintf(b, sizeof(b), "%pg", rdev->bdev);
strreplace(b, '/', '!');
rdev->mddev = mddev;
@@ -4831,7 +4831,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
mddev_unlock(mddev);
}
@@ -5579,7 +5579,7 @@ static void md_free(struct kobject *ko)
if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
- blk_cleanup_disk(mddev->gendisk);
+ put_disk(mddev->gendisk);
}
percpu_ref_exit(&mddev->writes_pending);
@@ -5718,7 +5718,7 @@ static int md_alloc(dev_t dev, char *name)
out_del_gendisk:
del_gendisk(disk);
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_unlock_disks_mutex:
mutex_unlock(&disks_mutex);
mddev_put(mddev);
@@ -6197,7 +6197,7 @@ static void __md_stop_writes(struct mddev *mddev)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
del_timer_sync(&mddev->safemode_timer);
@@ -9303,7 +9303,7 @@ void md_check_recovery(struct mddev *mddev)
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
@@ -9338,7 +9338,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
if (mddev->sync_thread) {
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
goto unlock;
}
/* Set RUNNING before clearing NEEDED to avoid
@@ -9411,18 +9411,14 @@ void md_check_recovery(struct mddev *mddev)
}
EXPORT_SYMBOL(md_check_recovery);
-void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held)
+void md_reap_sync_thread(struct mddev *mddev)
{
struct md_rdev *rdev;
sector_t old_dev_sectors = mddev->dev_sectors;
bool is_reshaped = false;
- if (reconfig_mutex_held)
- mddev_unlock(mddev);
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
- if (reconfig_mutex_held)
- mddev_lock_nointr(mddev);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5f62c46ac2d3..b4f84b27bdef 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -719,7 +719,7 @@ extern struct md_thread *md_register_thread(
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
-extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held);
+extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
@@ -738,8 +738,7 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page);
extern int md_super_wait(struct mddev *mddev);
extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
- struct page *page, int op, int op_flags,
- bool metadata_op);
+ struct page *page, blk_opf_t opf, bool metadata_op);
extern void md_do_sync(struct md_thread *thread);
extern void md_new_event(void);
extern void md_allow_write(struct mddev *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 258d4eb2d63c..05d8438cfec8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1220,8 +1220,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct raid1_info *mirror;
struct bio *read_bio;
struct bitmap *bitmap = mddev->bitmap;
- const int op = bio_op(bio);
- const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ const enum req_op op = bio_op(bio);
+ const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
int rdisk;
bool r1bio_existed = !!r1_bio;
@@ -1240,7 +1240,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
if (rdev)
- bdevname(rdev->bdev, b);
+ snprintf(b, sizeof(b), "%pg", rdev->bdev);
else
strcpy(b, "???");
rcu_read_unlock();
@@ -1988,9 +1988,9 @@ static void end_sync_write(struct bio *bio)
}
static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
- int sectors, struct page *page, int rw)
+ int sectors, struct page *page, int rw)
{
- if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false))
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
if (rw == WRITE) {
@@ -2057,7 +2057,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, sect, s<<9,
pages[idx],
- REQ_OP_READ, 0, false)) {
+ REQ_OP_READ, false)) {
success = 1;
break;
}
@@ -2305,7 +2305,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev, sect, s<<9,
- conf->tmppage, REQ_OP_READ, 0, false))
+ conf->tmppage, REQ_OP_READ, false))
success = 1;
rdev_dec_pending(rdev, mddev);
if (success)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d589f823feb1..26545950ca42 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1136,8 +1136,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
- const int op = bio_op(bio);
- const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ const enum req_op op = bio_op(bio);
+ const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
struct md_rdev *rdev;
char b[BDEVNAME_SIZE];
@@ -1164,7 +1164,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
disk = r10_bio->devs[slot].devnum;
err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (err_rdev)
- bdevname(err_rdev->bdev, b);
+ snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
else {
strcpy(b, "???");
/* This never gets dereferenced */
@@ -1230,9 +1230,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
struct bio *bio, bool replacement,
int n_copy)
{
- const int op = bio_op(bio);
- const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
- const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+ const enum req_op op = bio_op(bio);
+ const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
+ const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
unsigned long flags;
struct blk_plug_cb *cb;
struct raid1_plug_cb *plug = NULL;
@@ -2512,7 +2512,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
addr,
s << 9,
pages[idx],
- REQ_OP_READ, 0, false);
+ REQ_OP_READ, false);
if (ok) {
rdev = conf->mirrors[dw].rdev;
addr = r10_bio->devs[1].addr + sect;
@@ -2520,7 +2520,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
addr,
s << 9,
pages[idx],
- REQ_OP_WRITE, 0, false);
+ REQ_OP_WRITE, false);
if (!ok) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement,
@@ -2644,7 +2644,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
&& (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
- if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false))
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
if (rw == WRITE) {
@@ -2726,7 +2726,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sect,
s<<9,
conf->tmppage,
- REQ_OP_READ, 0, false);
+ REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success)
@@ -5107,7 +5107,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
addr,
s << 9,
pages[idx],
- REQ_OP_READ, 0, false);
+ REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 83c184eddbda..6f2dd73128b0 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1788,7 +1788,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
mb = page_address(page);
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb, PAGE_SIZE));
- if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
+ if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE |
REQ_SYNC | REQ_FUA, false)) {
__free_page(page);
return -EIO;
@@ -1898,7 +1898,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
sync_page_io(rdev, sh->sector, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+ sh->dev[disk_index].page, REQ_OP_WRITE,
false);
rdev_dec_pending(rdev, rdev->mddev);
rcu_read_lock();
@@ -1908,7 +1908,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
sync_page_io(rrdev, sh->sector, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+ sh->dev[disk_index].page, REQ_OP_WRITE,
false);
rdev_dec_pending(rrdev, rrdev->mddev);
rcu_read_lock();
@@ -2394,7 +2394,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
PAGE_SIZE));
kunmap_atomic(addr);
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
- dev->page, REQ_OP_WRITE, 0, false);
+ dev->page, REQ_OP_WRITE, false);
write_pos = r5l_ring_add(log, write_pos,
BLOCK_SECTORS);
offset += sizeof(__le32) +
@@ -2406,7 +2406,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb, PAGE_SIZE));
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
- REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false);
+ REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false);
sh->log_start = ctx->pos;
list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
atomic_inc(&log->stripe_in_journal_count);
@@ -2971,7 +2971,7 @@ static int r5l_load_log(struct r5l_log *log)
if (!page)
return -ENOMEM;
- if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, 0, false)) {
+ if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, false)) {
ret = -EIO;
goto ioerr;
}
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 973e2e06f19c..98988cb26295 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -629,9 +629,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
if (bdev) {
struct bio *bio;
- bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+ bio = bio_alloc_bioset(bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH,
- &ppl_conf->flush_bs);
+ GFP_NOIO, &ppl_conf->flush_bs);
bio->bi_private = io;
bio->bi_end_io = ppl_flush_endio;
@@ -897,7 +897,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
__func__, indent, "", rdev->bdev,
(unsigned long long)sector);
if (!sync_page_io(rdev, sector, block_size, page2,
- REQ_OP_READ, 0, false)) {
+ REQ_OP_READ, false)) {
md_error(mddev, rdev);
pr_debug("%s:%*s read failed!\n", __func__,
indent, "");
@@ -919,7 +919,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)(ppl_sector + i));
if (!sync_page_io(log->rdev,
ppl_sector - log->rdev->data_offset + i,
- block_size, page2, REQ_OP_READ, 0,
+ block_size, page2, REQ_OP_READ,
false)) {
pr_debug("%s:%*s read failed!\n", __func__,
indent, "");
@@ -946,7 +946,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)parity_sector,
parity_rdev->bdev);
if (!sync_page_io(parity_rdev, parity_sector, block_size,
- page1, REQ_OP_WRITE, 0, false)) {
+ page1, REQ_OP_WRITE, false)) {
pr_debug("%s:%*s parity write error!\n", __func__,
indent, "");
md_error(mddev, parity_rdev);
@@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size;
if (!sync_page_io(rdev, sector - rdev->data_offset,
- s, page, REQ_OP_READ, 0, false)) {
+ s, page, REQ_OP_READ, false)) {
md_error(mddev, rdev);
ret = -EIO;
goto out;
@@ -1062,7 +1062,7 @@ static int ppl_write_empty_header(struct ppl_log *log)
if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
- REQ_FUA, 0, false)) {
+ REQ_FUA, false)) {
md_error(rdev->mddev, rdev);
ret = -EIO;
}
@@ -1100,7 +1100,7 @@ static int ppl_load_distributed(struct ppl_log *log)
if (!sync_page_io(rdev,
rdev->ppl.sector - rdev->data_offset +
pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
- 0, false)) {
+ false)) {
md_error(mddev, rdev);
ret = -EIO;
/* if not able to read - don't recover any PPL */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5d09256d7f81..5cabdbbac48b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1082,7 +1082,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
should_defer = conf->batch_bio_dispatch && conf->group_cnt;
for (i = disks; i--; ) {
- int op, op_flags = 0;
+ enum req_op op;
+ blk_opf_t op_flags = 0;
int replace_only = 0;
struct bio *bi, *rbi;
struct md_rdev *rdev, *rrdev = NULL;
@@ -7304,7 +7305,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
conf->mddev = mddev;
- if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+ ret = -ENOMEM;
+ conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!conf->stripe_hashtbl)
goto abort;
/* We init hash_locks[0] separately to that it can be used
@@ -7933,7 +7936,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
int err = 0;
int number = rdev->raid_disk;
struct md_rdev __rcu **rdevp;
- struct disk_info *p = conf->disks + number;
+ struct disk_info *p;
struct md_rdev *tmp;
print_raid5_conf(conf);
@@ -7952,6 +7955,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
log_exit(conf);
return 0;
}
+ if (unlikely(number >= conf->pool_size))
+ return 0;
+ p = conf->disks + number;
if (rdev == rcu_access_pointer(p->rdev))
rdevp = &p->rdev;
else if (rdev == rcu_access_pointer(p->replacement))
@@ -8062,6 +8068,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
*/
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
+ rdev->saved_raid_disk <= last &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
first = rdev->saved_raid_disk;