diff options
Diffstat (limited to 'drivers/md')
42 files changed, 1095 insertions, 691 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 0454b0885b01..84291e38dca8 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -5,7 +5,7 @@ dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ - dm-rq.o + dm-rq.o dm-io-rewind.o dm-multipath-y += dm-path-selector.o dm-mpath.o dm-historical-service-time-y += dm-ps-historical-service-time.o dm-io-affinity-y += dm-ps-io-affinity.o @@ -83,6 +83,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o obj-$(CONFIG_DM_ZONED) += dm-zoned.o obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o +obj-$(CONFIG_SECURITY_LOADPIN_VERITY) += dm-verity-loadpin.o ifeq ($(CONFIG_DM_INIT),y) dm-mod-objs += dm-init.o diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3563d15dbaf2..ba3909bb6bea 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -414,8 +414,8 @@ static void uuid_io_unlock(struct closure *cl) up(&c->uuid_write_mutex); } -static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, - struct bkey *k, struct closure *parent) +static void uuid_io(struct cache_set *c, blk_opf_t opf, struct bkey *k, + struct closure *parent) { struct closure *cl = &c->uuid_write; struct uuid_entry *u; @@ -429,22 +429,22 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, for (i = 0; i < KEY_PTRS(k); i++) { struct bio *bio = bch_bbio_alloc(c); - bio->bi_opf = REQ_SYNC | REQ_META | op_flags; + bio->bi_opf = opf | REQ_SYNC | REQ_META; bio->bi_iter.bi_size = KEY_SIZE(k) << 9; bio->bi_end_io = uuid_endio; bio->bi_private = cl; - bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, c->uuids); bch_submit_bbio(bio, c, k, i); - if (op != REQ_OP_WRITE) + if ((opf & REQ_OP_MASK) != REQ_OP_WRITE) break; } bch_extent_to_text(buf, sizeof(buf), k); - pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); + pr_debug("%s UUIDs at %s\n", (opf & REQ_OP_MASK) == REQ_OP_WRITE ? + "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) @@ -463,7 +463,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) return "bad uuid pointer"; bkey_copy(&c->uuid_bucket, k); - uuid_io(c, REQ_OP_READ, 0, k, cl); + uuid_io(c, REQ_OP_READ, k, cl); if (j->version < BCACHE_JSET_VERSION_UUIDv1) { struct uuid_entry_v0 *u0 = (void *) c->uuids; @@ -511,7 +511,7 @@ static int __uuid_write(struct cache_set *c) size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; SET_KEY_SIZE(&k.key, size); - uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); + uuid_io(c, REQ_OP_WRITE, &k.key, &cl); closure_sync(&cl); /* Only one bucket used for uuid write */ @@ -587,8 +587,7 @@ static void prio_endio(struct bio *bio) closure_put(&ca->prio); } -static void prio_io(struct cache *ca, uint64_t bucket, int op, - unsigned long op_flags) +static void prio_io(struct cache *ca, uint64_t bucket, blk_opf_t opf) { struct closure *cl = &ca->prio; struct bio *bio = bch_bbio_alloc(ca->set); @@ -601,7 +600,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio->bi_end_io = prio_endio; bio->bi_private = ca; - bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); + bio->bi_opf = opf | REQ_SYNC | REQ_META; bch_bio_map(bio, ca->disk_buckets); closure_bio_submit(ca->set, bio, &ca->prio); @@ -661,7 +660,7 @@ int bch_prio_write(struct cache *ca, bool wait) BUG_ON(bucket == -1); mutex_unlock(&ca->set->bucket_lock); - prio_io(ca, bucket, REQ_OP_WRITE, 0); + prio_io(ca, bucket, REQ_OP_WRITE); mutex_lock(&ca->set->bucket_lock); ca->prio_buckets[i] = bucket; @@ -705,7 +704,7 @@ static int prio_read(struct cache *ca, uint64_t bucket) ca->prio_last_buckets[bucket_nr] = bucket; bucket_nr++; - prio_io(ca, bucket, REQ_OP_READ, 0); + prio_io(ca, bucket, REQ_OP_READ); if (p->csum != bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) { @@ -884,7 +883,7 @@ static void bcache_device_free(struct bcache_device *d) if (disk) { ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); - blk_cleanup_disk(disk); + put_disk(disk); } bioset_exit(&d->bio_split); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5ffa1dcf84cf..dc01ce33265b 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -577,13 +577,12 @@ static void dmio_complete(unsigned long error, void *context) b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); } -static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, +static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, unsigned n_sectors, unsigned offset) { int r; struct dm_io_request io_req = { - .bi_op = rw, - .bi_op_flags = 0, + .bi_opf = op, .notify.fn = dmio_complete, .notify.context = b, .client = b->c->dm_io, @@ -616,7 +615,7 @@ static void bio_complete(struct bio *bio) b->end_io(b, status); } -static void use_bio(struct dm_buffer *b, int rw, sector_t sector, +static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, unsigned n_sectors, unsigned offset) { struct bio *bio; @@ -630,10 +629,10 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector, bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); if (!bio) { dmio: - use_dmio(b, rw, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset); return; } - bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw); + bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, op); bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_complete; bio->bi_private = b; @@ -669,7 +668,8 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block return sector; } -static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) +static void submit_io(struct dm_buffer *b, enum req_op op, + void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned n_sectors; sector_t sector; @@ -679,7 +679,7 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff sector = block_to_sector(b->c, b->block); - if (rw != REQ_OP_WRITE) { + if (op != REQ_OP_WRITE) { n_sectors = b->c->block_size >> SECTOR_SHIFT; offset = 0; } else { @@ -698,9 +698,9 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff } if (b->data_mode != DATA_MODE_VMALLOC) - use_bio(b, rw, sector, n_sectors, offset); + use_bio(b, op, sector, n_sectors, offset); else - use_dmio(b, rw, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset); } /*---------------------------------------------------------------- @@ -1341,8 +1341,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, @@ -1365,8 +1364,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) { struct dm_io_request io_req = { - .bi_op = REQ_OP_DISCARD, - .bi_op_flags = REQ_SYNC, + .bi_opf = REQ_OP_DISCARD | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 179ed5bf81a3..0905f2c1615e 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -131,7 +131,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd); * hints will be lost. * * The hints are indexed by the cblock, but many policies will not - * neccessarily have a fast way of accessing efficiently via cblock. So + * necessarily have a fast way of accessing efficiently via cblock. So * rather than querying the policy for each cblock, we let it walk its data * structures and fill in the hints in whatever order it wishes. */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 28c5de8eca4a..54a8d5c9a44e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2775,7 +2775,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, /* * The discard block size in the on disk metadata is not - * neccessarily the same as we're currently using. So we have to + * necessarily the same as we're currently using. So we have to * be careful to only set the discarded attribute if we know it * covers a complete block of the new size. */ diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 54c0473a51dd..6c6bd24774f2 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -22,6 +22,8 @@ #define DM_RESERVED_MAX_IOS 1024 +struct dm_io; + struct dm_kobject_holder { struct kobject kobj; struct completion completion; @@ -91,6 +93,14 @@ struct mapped_device { spinlock_t deferred_lock; struct bio_list deferred; + /* + * requeue work context is needed for cloning one new bio + * to represent the dm_io to be requeued, since each + * dm_io may point to the original bio from FS. + */ + struct work_struct requeue_work; + struct dm_io *requeue_list; + void *interface_ptr; /* @@ -216,6 +226,13 @@ struct dm_table { #endif }; +static inline struct dm_target *dm_table_get_target(struct dm_table *t, + unsigned int index) +{ + BUG_ON(index >= t->num_targets); + return t->targets + index; +} + /* * One of these is allocated per clone bio. */ @@ -230,6 +247,9 @@ struct dm_target_io { sector_t old_sector; struct bio clone; }; +#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone)) +#define DM_IO_BIO_OFFSET \ + (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio)) /* * dm_target_io flags @@ -299,6 +319,8 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit) io->flags |= (1U << bit); } +void dm_io_rewind(struct dm_io *io, struct bio_set *bs); + static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) { return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 0221fa63f888..223e8e1a7a13 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -61,7 +61,8 @@ static inline bool __ebs_check_bs(unsigned int bs) * * copy blocks between bufio blocks and bio vector's (partial/overlapping) pages. */ -static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter) +static int __ebs_rw_bvec(struct ebs_c *ec, enum req_op op, struct bio_vec *bv, + struct bvec_iter *iter) { int r = 0; unsigned char *ba, *pa; @@ -81,7 +82,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len); /* Avoid reading for writes in case bio vector's page overwrites block completely. */ - if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio)) + if (op == REQ_OP_READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio)) ba = dm_bufio_read(ec->bufio, block, &b); else ba = dm_bufio_new(ec->bufio, block, &b); @@ -95,7 +96,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv } else { /* Copy data to/from bio to buffer if read/new was successful above. */ ba += buf_off; - if (rw == READ) { + if (op == REQ_OP_READ) { memcpy(pa, ba, cur_len); flush_dcache_page(bv->bv_page); } else { @@ -117,14 +118,14 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv } /* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */ -static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio) +static int __ebs_rw_bio(struct ebs_c *ec, enum req_op op, struct bio *bio) { int r = 0, rr; struct bio_vec bv; struct bvec_iter iter; bio_for_each_bvec(bv, bio, iter) { - rr = __ebs_rw_bvec(ec, rw, &bv, &iter); + rr = __ebs_rw_bvec(ec, op, &bv, &iter); if (rr) r = rr; } @@ -205,10 +206,10 @@ static void __ebs_process_bios(struct work_struct *ws) bio_list_for_each(bio, &bios) { r = -EIO; if (bio_op(bio) == REQ_OP_READ) - r = __ebs_rw_bio(ec, READ, bio); + r = __ebs_rw_bio(ec, REQ_OP_READ, bio); else if (bio_op(bio) == REQ_OP_WRITE) { write = true; - r = __ebs_rw_bio(ec, WRITE, bio); + r = __ebs_rw_bio(ec, REQ_OP_WRITE, bio); } else if (bio_op(bio) == REQ_OP_DISCARD) { __ebs_forget_bio(ec, bio); r = __ebs_discard_bio(ec, bio); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 1f6bf152b3c7..e92c1afc3677 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1400,7 +1400,7 @@ static void start_worker(struct era *era) static void stop_worker(struct era *era) { atomic_set(&era->suspended, 1); - flush_workqueue(era->wq); + drain_workqueue(era->wq); } /*---------------------------------------------------------------- @@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_target *ti) } stop_worker(era); + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); + /* FIXME: fail mode */ + } } static int era_preresume(struct dm_target *ti) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index f2305eb758a2..89fa7a68c6c4 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -32,7 +32,7 @@ struct flakey_c { unsigned corrupt_bio_byte; unsigned corrupt_bio_rw; unsigned corrupt_bio_value; - unsigned corrupt_bio_flags; + blk_opf_t corrupt_bio_flags; }; enum feature_flag_bits { @@ -145,7 +145,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, /* * Only corrupt bios with these flags set. */ - r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); + BUILD_BUG_ON(sizeof(fc->corrupt_bio_flags) != + sizeof(unsigned int)); + r = dm_read_arg(_args + 3, as, + (__force unsigned *)&fc->corrupt_bio_flags, + &ti->error); if (r) return r; argc--; diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 1842d3a958ef..a1bd7cd52b1b 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -208,7 +208,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl if (!target_data_buf) goto error; - num_targets = dm_table_get_num_targets(table); + num_targets = table->num_targets; if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio)) goto error; @@ -237,9 +237,6 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); - if (!ti) - goto error; - last_target_measured = 0; /* diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3d5a0ce123c9..c60f9b2ece2d 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -298,7 +298,7 @@ struct dm_integrity_io { struct work_struct work; struct dm_integrity_c *ic; - enum req_opf op; + enum req_op op; bool fua; struct dm_integrity_range range; @@ -551,14 +551,14 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) return 0; } -static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) +static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) { struct dm_io_request io_req; struct dm_io_region io_loc; + const enum req_op op = opf & REQ_OP_MASK; int r; - io_req.bi_op = op; - io_req.bi_op_flags = op_flags; + io_req.bi_opf = opf; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = ic->sb; io_req.notify.fn = NULL; @@ -1050,8 +1050,9 @@ static void complete_journal_io(unsigned long error, void *context) complete_journal_op(comp); } -static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, - unsigned sector, unsigned n_sectors, struct journal_completion *comp) +static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, + unsigned sector, unsigned n_sectors, + struct journal_completion *comp) { struct dm_io_request io_req; struct dm_io_region io_loc; @@ -1067,8 +1068,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); - io_req.bi_op = op; - io_req.bi_op_flags = op_flags; + io_req.bi_opf = opf; io_req.mem.type = DM_IO_PAGE_LIST; if (ic->journal_io) io_req.mem.ptr.pl = &ic->journal_io[pl_index]; @@ -1088,7 +1088,8 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, r = dm_io(&io_req, 1, &io_loc, NULL); if (unlikely(r)) { - dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r); + dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? + "reading journal" : "writing journal", r); if (comp) { WARN_ONCE(1, "asynchronous dm_io failed: %d", r); complete_journal_io(-1UL, comp); @@ -1096,15 +1097,16 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, } } -static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, - unsigned n_sections, struct journal_completion *comp) +static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf, + unsigned section, unsigned n_sections, + struct journal_completion *comp) { unsigned sector, n_sectors; sector = section * ic->journal_section_sectors; n_sectors = n_sections * ic->journal_section_sectors; - rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp); + rw_journal_sectors(ic, opf, sector, n_sectors, comp); } static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections) @@ -1129,7 +1131,7 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi for (i = 0; i < commit_sections; i++) rw_section_mac(ic, commit_start + i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, + rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start, commit_sections, &io_comp); } else { unsigned to_end; @@ -1141,7 +1143,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); if (try_wait_for_completion(&crypt_comp_1.comp)) { - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, + commit_start, to_end, &io_comp); reinit_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); @@ -1152,17 +1155,17 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); wait_for_completion_io(&crypt_comp_1.comp); - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); wait_for_completion_io(&crypt_comp_2.comp); } } else { for (i = 0; i < to_end; i++) rw_section_mac(ic, commit_start + i, true); - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); for (i = 0; i < commit_sections - to_end; i++) rw_section_mac(ic, i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp); } wait_for_completion_io(&io_comp.comp); @@ -1188,8 +1191,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); - io_req.bi_op = REQ_OP_WRITE; - io_req.bi_op_flags = 0; + io_req.bi_opf = REQ_OP_WRITE; io_req.mem.type = DM_IO_PAGE_LIST; io_req.mem.ptr.pl = &ic->journal[pl_index]; io_req.mem.offset = pl_offset; @@ -1516,8 +1518,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat if (!ic->meta_dev) flush_data = false; if (flush_data) { - fr.io_req.bi_op = REQ_OP_WRITE, - fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, fr.io_req.mem.type = DM_IO_KMEM, fr.io_req.mem.ptr.addr = NULL, fr.io_req.notify.fn = flush_notify, @@ -2626,7 +2627,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) if (dm_integrity_failed(ic)) return; - r = sync_rw_sb(ic, REQ_OP_WRITE, 0); + r = sync_rw_sb(ic, REQ_OP_WRITE); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } @@ -2706,8 +2707,7 @@ next_chunk: if (unlikely(dm_integrity_failed(ic))) goto err; - io_req.bi_op = REQ_OP_READ; - io_req.bi_op_flags = 0; + io_req.bi_opf = REQ_OP_READ; io_req.mem.type = DM_IO_VMA; io_req.mem.ptr.addr = ic->recalc_buffer; io_req.notify.fn = NULL; @@ -2800,7 +2800,7 @@ static void bitmap_block_work(struct work_struct *w) if (bio_list_empty(&waiting)) return; - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); @@ -2846,7 +2846,7 @@ static void bitmap_flush_work(struct work_struct *work) block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); spin_lock_irq(&ic->endio_wait.lock); @@ -2918,7 +2918,7 @@ static void replay_journal(struct dm_integrity_c *ic) if (!ic->just_formatted) { DEBUG_print("reading journal\n"); - rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL); + rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL); if (ic->journal_io) DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); if (ic->journal_io) { @@ -3113,7 +3113,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); #endif @@ -3136,23 +3136,23 @@ static void dm_integrity_resume(struct dm_target *ti) if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { - rw_journal_sectors(ic, REQ_OP_READ, 0, 0, + rw_journal_sectors(ic, REQ_OP_READ, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); block_bitmap_op(ic, ic->journal, old_provided_data_sectors, ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { DEBUG_print("resume dirty_bitmap\n"); - rw_journal_sectors(ic, REQ_OP_READ, 0, 0, + rw_journal_sectors(ic, REQ_OP_READ, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); if (ic->mode == 'B') { if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && @@ -3171,7 +3171,7 @@ static void dm_integrity_resume(struct dm_target *ti) block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); ic->sb->recalc_sector = cpu_to_le64(0); @@ -3187,7 +3187,7 @@ static void dm_integrity_resume(struct dm_target *ti) replay_journal(ic); ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); } - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } else { @@ -3199,7 +3199,7 @@ static void dm_integrity_resume(struct dm_target *ti) if (ic->mode == 'B') { ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); @@ -3215,7 +3215,7 @@ static void dm_integrity_resume(struct dm_target *ti) block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); } - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } } @@ -4256,7 +4256,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - r = sync_rw_sb(ic, REQ_OP_READ, 0); + r = sync_rw_sb(ic, REQ_OP_READ); if (r) { ti->error = "Error reading superblock"; goto bad; @@ -4500,7 +4500,7 @@ try_smaller_buffer: ti->error = "Error initializing journal"; goto bad; } - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (r) { ti->error = "Error initializing superblock"; goto bad; diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c new file mode 100644 index 000000000000..0db53ccb94ba --- /dev/null +++ b/drivers/md/dm-io-rewind.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022 Red Hat, Inc. + */ + +#include <linux/bio.h> +#include <linux/blk-crypto.h> +#include <linux/blk-integrity.h> + +#include "dm-core.h" + +static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned int bytes) +{ + int idx; + + iter->bi_size += bytes; + if (bytes <= iter->bi_bvec_done) { + iter->bi_bvec_done -= bytes; + return true; + } + + bytes -= iter->bi_bvec_done; + idx = iter->bi_idx - 1; + + while (idx >= 0 && bytes && bytes > bv[idx].bv_len) { + bytes -= bv[idx].bv_len; + idx--; + } + + if (WARN_ONCE(idx < 0 && bytes, + "Attempted to rewind iter beyond bvec's boundaries\n")) { + iter->bi_size -= bytes; + iter->bi_bvec_done = 0; + iter->bi_idx = 0; + return false; + } + + iter->bi_idx = idx; + iter->bi_bvec_done = bv[idx].bv_len - bytes; + return true; +} + +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +/** + * dm_bio_integrity_rewind - Rewind integrity vector + * @bio: bio whose integrity vector to update + * @bytes_done: number of data bytes to rewind + * + * Description: This function calculates how many integrity bytes the + * number of completed data bytes correspond to and rewind the + * integrity vector accordingly. + */ +static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); + + bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9); + dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes); +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +static inline void dm_bio_integrity_rewind(struct bio *bio, + unsigned int bytes_done) +{ + return; +} + +#endif + +#if defined(CONFIG_BLK_INLINE_ENCRYPTION) + +/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */ +static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + unsigned int dec) +{ + int i; + + for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) { + u64 prev = dun[i]; + + dun[i] -= dec; + if (dun[i] > prev) + dec = 1; + else + dec = 0; + } +} + +static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes) +{ + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + dm_bio_crypt_dun_decrement(bc->bc_dun, + bytes >> bc->bc_key->data_unit_size_bits); +} + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes) +{ + return; +} + +#endif + +static inline void dm_bio_rewind_iter(const struct bio *bio, + struct bvec_iter *iter, unsigned int bytes) +{ + iter->bi_sector -= bytes >> 9; + + /* No advance means no rewind */ + if (bio_no_advance_iter(bio)) + iter->bi_size += bytes; + else + dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes); +} + +/** + * dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes. + * @bio: bio to rewind + * @bytes: how many bytes to rewind + * + * WARNING: + * Caller must ensure that @bio has a fixed end sector, to allow + * rewinding from end of bio and restoring its original position. + * Caller is also responsibile for restoring bio's size. + */ +static void dm_bio_rewind(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + dm_bio_integrity_rewind(bio, bytes); + + if (bio_has_crypt_ctx(bio)) + dm_bio_crypt_rewind(bio, bytes); + + dm_bio_rewind_iter(bio, &bio->bi_iter, bytes); +} + +void dm_io_rewind(struct dm_io *io, struct bio_set *bs) +{ + struct bio *orig = io->orig_bio; + struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig, + GFP_NOIO, bs); + /* + * dm_bio_rewind can restore to previous position since the + * end sector is fixed for original bio, but we still need + * to restore bio's size manually (using io->sectors). + */ + dm_bio_rewind(new_orig, ((io->sector_offset << 9) - + orig->bi_iter.bi_size)); + bio_trim(new_orig, 0, io->sectors); + + bio_chain(new_orig, orig); + /* + * __bi_remaining was increased (by dm_split_and_process_bio), + * so must drop the one added in bio_chain. + */ + atomic_dec(&orig->__bi_remaining); + io->orig_bio = new_orig; +} diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e4b95eaeec8c..783564533459 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -293,7 +293,7 @@ static void km_dp_init(struct dpages *dp, void *data) /*----------------------------------------------------------------- * IO routines that accept a list of pages. *---------------------------------------------------------------*/ -static void do_region(int op, int op_flags, unsigned region, +static void do_region(const blk_opf_t opf, unsigned region, struct dm_io_region *where, struct dpages *dp, struct io *io) { @@ -306,6 +306,7 @@ static void do_region(int op, int op_flags, unsigned region, struct request_queue *q = bdev_get_queue(where->bdev); sector_t num_sectors; unsigned int special_cmd_max_sectors; + const enum req_op op = opf & REQ_OP_MASK; /* * Reject unsupported discard and write same requests. @@ -339,8 +340,8 @@ static void do_region(int op, int op_flags, unsigned region, (PAGE_SIZE >> SECTOR_SHIFT))); } - bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags, - GFP_NOIO, &io->client->bios); + bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO, + &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; store_io_and_region_in_bio(bio, io, region); @@ -368,7 +369,7 @@ static void do_region(int op, int op_flags, unsigned region, } while (remaining); } -static void dispatch_io(int op, int op_flags, unsigned int num_regions, +static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, struct io *io, int sync) { @@ -378,7 +379,7 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions, BUG_ON(num_regions > DM_IO_MAX_REGIONS); if (sync) - op_flags |= REQ_SYNC; + opf |= REQ_SYNC; /* * For multiple regions we need to be careful to rewind @@ -386,8 +387,8 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions, */ for (i = 0; i < num_regions; i++) { *dp = old_pages; - if (where[i].count || (op_flags & REQ_PREFLUSH)) - do_region(op, op_flags, i, where + i, dp, io); + if (where[i].count || (opf & REQ_PREFLUSH)) + do_region(opf, i, where + i, dp, io); } /* @@ -411,13 +412,13 @@ static void sync_io_complete(unsigned long error, void *context) } static int sync_io(struct dm_io_client *client, unsigned int num_regions, - struct dm_io_region *where, int op, int op_flags, - struct dpages *dp, unsigned long *error_bits) + struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, + unsigned long *error_bits) { struct io *io; struct sync_io sio; - if (num_regions > 1 && !op_is_write(op)) { + if (num_regions > 1 && !op_is_write(opf)) { WARN_ON(1); return -EIO; } @@ -434,7 +435,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(op, op_flags, num_regions, where, dp, io, 1); + dispatch_io(opf, num_regions, where, dp, io, 1); wait_for_completion_io(&sio.wait); @@ -445,12 +446,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } static int async_io(struct dm_io_client *client, unsigned int num_regions, - struct dm_io_region *where, int op, int op_flags, + struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, io_notify_fn fn, void *context) { struct io *io; - if (num_regions > 1 && !op_is_write(op)) { + if (num_regions > 1 && !op_is_write(opf)) { WARN_ON(1); fn(1, context); return -EIO; @@ -466,7 +467,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(op, op_flags, num_regions, where, dp, io, 0); + dispatch_io(opf, num_regions, where, dp, io, 0); return 0; } @@ -489,7 +490,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, case DM_IO_VMA: flush_kernel_vmap_range(io_req->mem.ptr.vma, size); - if (io_req->bi_op == REQ_OP_READ) { + if ((io_req->bi_opf & REQ_OP_MASK) == REQ_OP_READ) { dp->vma_invalidate_address = io_req->mem.ptr.vma; dp->vma_invalidate_size = size; } @@ -519,11 +520,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_op, io_req->bi_op_flags, &dp, - sync_error_bits); + io_req->bi_opf, &dp, sync_error_bits); - return async_io(io_req->client, num_regions, where, io_req->bi_op, - io_req->bi_op_flags, &dp, io_req->notify.fn, + return async_io(io_req->client, num_regions, where, + io_req->bi_opf, &dp, io_req->notify.fn, io_req->notify.context); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 87310fceb0d8..98976aaa9db9 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -832,7 +832,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) { if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; - param->target_count = dm_table_get_num_targets(table); + param->target_count = table->num_targets; } param->flags |= DM_ACTIVE_PRESENT_FLAG; @@ -845,7 +845,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (table) { if (!(dm_table_get_mode(table) & FMODE_WRITE)) param->flags |= DM_READONLY_FLAG; - param->target_count = dm_table_get_num_targets(table); + param->target_count = table->num_targets; } dm_put_live_table(md, srcu_idx); } @@ -1248,7 +1248,7 @@ static void retrieve_status(struct dm_table *table, type = STATUSTYPE_INFO; /* Get all the target info */ - num_targets = dm_table_get_num_targets(table); + num_targets = table->num_targets; for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); size_t l; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 37b03ab7e5c9..4d3bbbea2e9a 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -219,7 +219,7 @@ static struct page_list *alloc_pl(gfp_t gfp) if (!pl) return NULL; - pl->page = alloc_page(gfp); + pl->page = alloc_page(gfp | __GFP_HIGHMEM); if (!pl->page) { kfree(pl); return NULL; @@ -350,9 +350,9 @@ struct kcopyd_job { unsigned long write_err; /* - * Either READ or WRITE + * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES. */ - int rw; + enum req_op op; struct dm_io_region source; /* @@ -418,7 +418,8 @@ static struct kcopyd_job *pop_io_job(struct list_head *jobs, * constraint and sequential writes that are at the right position. */ list_for_each_entry(job, jobs, list) { - if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { + if (job->op == REQ_OP_READ || + !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { list_del(&job->list); return job; } @@ -518,7 +519,7 @@ static void complete_io(unsigned long error, void *context) io_job_finish(kc->throttle); if (error) { - if (op_is_write(job->rw)) + if (op_is_write(job->op)) job->write_err |= error; else job->read_err = 1; @@ -530,11 +531,11 @@ static void complete_io(unsigned long error, void *context) } } - if (op_is_write(job->rw)) + if (op_is_write(job->op)) push(&kc->complete_jobs, job); else { - job->rw = WRITE; + job->op = REQ_OP_WRITE; push(&kc->io_jobs, job); } @@ -549,8 +550,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_op = job->rw, - .bi_op_flags = 0, + .bi_opf = job->op, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = 0, @@ -571,7 +571,7 @@ static int run_io_job(struct kcopyd_job *job) io_job_start(job->kc->throttle); - if (job->rw == READ) + if (job->op == REQ_OP_READ) r = dm_io(&io_req, 1, &job->source, NULL); else r = dm_io(&io_req, job->num_dests, job->dests, NULL); @@ -614,7 +614,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (op_is_write(job->rw)) + if (op_is_write(job->op)) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -817,7 +817,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, if (from) { job->source = *from; job->pages = NULL; - job->rw = READ; + job->op = REQ_OP_READ; } else { memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; @@ -826,10 +826,10 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, /* * Use WRITE ZEROES to optimize zeroing if all dests support it. */ - job->rw = REQ_OP_WRITE_ZEROES; + job->op = REQ_OP_WRITE_ZEROES; for (i = 0; i < job->num_dests; i++) if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { - job->rw = WRITE; + job->op = REQ_OP_WRITE; break; } } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 06f328928a7f..cf10fa667797 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -291,10 +291,9 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis core->nr_regions = le64_to_cpu(disk->nr_regions); } -static int rw_header(struct log_c *lc, int op) +static int rw_header(struct log_c *lc, enum req_op op) { - lc->io_req.bi_op = op; - lc->io_req.bi_op_flags = 0; + lc->io_req.bi_opf = op; return dm_io(&lc->io_req, 1, &lc->header_location, NULL); } @@ -307,8 +306,7 @@ static int flush_header(struct log_c *lc) .count = 0, }; - lc->io_req.bi_op = REQ_OP_WRITE; - lc->io_req.bi_op_flags = REQ_PREFLUSH; + lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } @@ -415,8 +413,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, /* * Work out how many "unsigned long"s we need to hold the bitset. */ - bitset_size = dm_round_up(region_count, - sizeof(*lc->clean_bits) << BYTE_SHIFT); + bitset_size = dm_round_up(region_count, BITS_PER_LONG); bitset_size >>= BYTE_SHIFT; lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); @@ -616,7 +613,7 @@ static int disk_resume(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); /* clear any old bits -- device has shrunk */ - for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) + for (i = lc->region_count; i % BITS_PER_LONG; i++) log_clear_bit(lc, lc->clean_bits, i); /* copy clean across to sync */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5e41fbae3f6b..1ec17c32867f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1001,12 +1001,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned int i, rebuild_cnt = 0; - unsigned int rebuilds_per_group = 0, copies; + unsigned int rebuilds_per_group = 0, copies, raid_disks; unsigned int group_size, last_group_start; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || - !rs->dev[i].rdev.sb_page) + for (i = 0; i < rs->raid_disks; i++) + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page))) rebuild_cnt++; switch (rs->md.level) { @@ -1046,8 +1047,9 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ + raid_disks = min(rs->raid_disks, rs->md.raid_disks); if (__is_raid10_near(rs->md.new_layout)) { - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1070,10 +1072,10 @@ static int validate_raid_redundancy(struct raid_set *rs) * results in the need to treat the last (potentially larger) * set differently. */ - group_size = (rs->md.raid_disks / copies); - last_group_start = (rs->md.raid_disks / group_size) - 1; + group_size = (raid_disks / copies); + last_group_start = (raid_disks / group_size) - 1; last_group_start *= group_size; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1367,7 +1369,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, } rs->md.bitmap_info.daemon_sleep = value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) { - /* Userspace passes new data_offset after having extended the the data image LV */ + /* Userspace passes new data_offset after having extended the data image LV */ if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) { rs->ti->error = "Only one data_offset argument pair allowed"; return -EINVAL; @@ -1588,7 +1590,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { struct md_rdev *rdev = &rs->dev[i].rdev; if (!test_bit(Journal, &rdev->flags) && @@ -2036,7 +2038,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload) rdev->sb_loaded = 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) { + if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true)) { DMERR("Failed to read superblock of device at position %d", rdev->raid_disk); md_error(rdev->mddev, rdev); @@ -3095,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; ti->num_flush_bios = 1; + ti->needs_bio_set_dev = true; /* Restore any requested new layout for conversion decision */ rs_config_restore(rs, &rs_layout); @@ -3507,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; - struct r5conf *conf = mddev->private; + struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL; int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0; unsigned long recovery; unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -3725,7 +3728,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev, false); + md_reap_sync_thread(mddev); } } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) return -EBUSY; @@ -3766,13 +3769,13 @@ static int raid_iterate_devices(struct dm_target *ti, unsigned int i; int r = 0; - for (i = 0; !r && i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev) - r = fn(ti, - rs->dev[i].data_dev, - 0, /* No offset on data devs */ - rs->md.dev_sectors, - data); + for (i = 0; !r && i < rs->raid_disks; i++) { + if (rs->dev[i].data_dev) { + r = fn(ti, rs->dev[i].data_dev, + 0, /* No offset on data devs */ + rs->md.dev_sectors, data); + } + } return r; } @@ -3817,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); - for (i = 0; i < mddev->raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { r = &rs->dev[i].rdev; /* HM FIXME: enhance journal device recovery processing */ if (test_bit(Journal, &r->flags)) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 8811d484fdd1..06a38dc32025 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -260,8 +260,7 @@ static int mirror_flush(struct dm_target *ti) struct dm_io_region io[MAX_NR_MIRRORS]; struct mirror *m; struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, @@ -535,8 +534,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio) { struct dm_io_region io; struct dm_io_request io_req = { - .bi_op = REQ_OP_READ, - .bi_op_flags = 0, + .bi_opf = REQ_OP_READ, .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = read_callback, @@ -648,9 +646,9 @@ static void do_write(struct mirror_set *ms, struct bio *bio) unsigned int i; struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; struct mirror *m; + blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH); struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH), + .bi_opf = REQ_OP_WRITE | op_flags, .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, @@ -659,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) }; if (bio_op(bio) == REQ_OP_DISCARD) { - io_req.bi_op = REQ_OP_DISCARD; + io_req.bi_opf = REQ_OP_DISCARD | op_flags; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = NULL; } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a83b98a8d2a9..4f49bbcce4f1 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -43,7 +43,6 @@ unsigned dm_get_reserved_rq_based_ios(void) return __dm_get_module_param(&reserved_rq_based_ios, RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS); } -EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); static unsigned dm_get_blk_mq_nr_hw_queues(void) { diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 3bb5cff5d6fc..f46f930eedf9 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -226,8 +226,8 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, - int op_flags, int metadata) +static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, + int metadata) { struct dm_io_region where = { .bdev = dm_snap_cow(ps->store->snap)->bdev, @@ -235,8 +235,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, .count = ps->store->chunk_size, }; struct dm_io_request io_req = { - .bi_op = op, - .bi_op_flags = op_flags, + .bi_opf = opf, .mem.type = DM_IO_VMA, .mem.ptr.vma = area, .client = ps->io_client, @@ -282,11 +281,11 @@ static void skip_metadata(struct pstore *ps) * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. */ -static int area_io(struct pstore *ps, int op, int op_flags) +static int area_io(struct pstore *ps, blk_opf_t opf) { chunk_t chunk = area_location(ps, ps->current_area); - return chunk_io(ps, ps->area, chunk, op, op_flags, 0); + return chunk_io(ps, ps->area, chunk, opf, 0); } static void zero_memory_area(struct pstore *ps) @@ -297,7 +296,7 @@ static void zero_memory_area(struct pstore *ps) static int zero_disk_area(struct pstore *ps, chunk_t area) { return chunk_io(ps, ps->zero_area, area_location(ps, area), - REQ_OP_WRITE, 0, 0); + REQ_OP_WRITE, 0); } static int read_header(struct pstore *ps, int *new_snapshot) @@ -329,7 +328,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1); + r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 1); if (r) goto bad; @@ -390,7 +389,7 @@ static int write_header(struct pstore *ps) dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->store->chunk_size); - return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1); + return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 1); } /* @@ -734,8 +733,8 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, - REQ_PREFLUSH | REQ_FUA | REQ_SYNC)) + if (ps->valid && area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | + REQ_SYNC)) ps->valid = 0; /* @@ -775,7 +774,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, return 0; ps->current_area--; - r = area_io(ps, REQ_OP_READ, 0); + r = area_io(ps, REQ_OP_READ); if (r < 0) return r; ps->current_committed = ps->exceptions_per_area; @@ -812,7 +811,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); + r = area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); if (r < 0) return r; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 0d336b5ec571..d1c2f84d27e3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2026,7 +2026,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) /* * Write to snapshot - higher level takes care of RW/RO * flags so we should only get this if we are - * writeable. + * writable. */ if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bd539afbfe88..332f96b58252 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -6,6 +6,7 @@ */ #include "dm-core.h" +#include "dm-rq.h" #include <linux/module.h> #include <linux/vmalloc.h> @@ -174,8 +175,6 @@ static void dm_table_destroy_crypto_profile(struct dm_table *t); void dm_table_destroy(struct dm_table *t) { - unsigned int i; - if (!t) return; @@ -184,13 +183,13 @@ void dm_table_destroy(struct dm_table *t) kvfree(t->index[t->depth - 2]); /* free the targets */ - for (i = 0; i < t->num_targets; i++) { - struct dm_target *tgt = t->targets + i; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); - if (tgt->type->dtr) - tgt->type->dtr(tgt); + if (ti->type->dtr) + ti->type->dtr(ti); - dm_put_target_type(tgt->type); + dm_put_target_type(ti->type); } kvfree(t->highs); @@ -450,14 +449,14 @@ EXPORT_SYMBOL(dm_put_device); /* * Checks to see if the target joins onto the end of the table. */ -static int adjoin(struct dm_table *table, struct dm_target *ti) +static int adjoin(struct dm_table *t, struct dm_target *ti) { struct dm_target *prev; - if (!table->num_targets) + if (!t->num_targets) return !ti->begin; - prev = &table->targets[table->num_targets - 1]; + prev = &t->targets[t->num_targets - 1]; return (ti->begin == (prev->begin + prev->len)); } @@ -564,8 +563,8 @@ int dm_split_args(int *argc, char ***argvp, char *input) * two or more targets, the size of each piece it gets split into must * be compatible with the logical_block_size of the target processing it. */ -static int validate_hardware_logical_block_alignment(struct dm_table *table, - struct queue_limits *limits) +static int validate_hardware_logical_block_alignment(struct dm_table *t, + struct queue_limits *limits) { /* * This function uses arithmetic modulo the logical_block_size @@ -587,13 +586,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, struct dm_target *ti; struct queue_limits ti_limits; - unsigned i; + unsigned int i; /* * Check each entry in the table in turn. */ - for (i = 0; i < dm_table_get_num_targets(table); i++) { - ti = dm_table_get_target(table, i); + for (i = 0; i < t->num_targets; i++) { + ti = dm_table_get_target(t, i); blk_set_stacking_limits(&ti_limits); @@ -621,7 +620,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, if (remaining) { DMWARN("%s: table line %u (start sect %llu len %llu) " "not aligned to h/w logical block size %u", - dm_device_name(table->md), i, + dm_device_name(t->md), i, (unsigned long long) ti->begin, (unsigned long long) ti->len, limits->logical_block_size); @@ -636,7 +635,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, { int r = -EINVAL, argc; char **argv; - struct dm_target *tgt; + struct dm_target *ti; if (t->singleton) { DMERR("%s: target type %s must appear alone in table", @@ -646,87 +645,87 @@ int dm_table_add_target(struct dm_table *t, const char *type, BUG_ON(t->num_targets >= t->num_allocated); - tgt = t->targets + t->num_targets; - memset(tgt, 0, sizeof(*tgt)); + ti = t->targets + t->num_targets; + memset(ti, 0, sizeof(*ti)); if (!len) { DMERR("%s: zero-length target", dm_device_name(t->md)); return -EINVAL; } - tgt->type = dm_get_target_type(type); - if (!tgt->type) { + ti->type = dm_get_target_type(type); + if (!ti->type) { DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); return -EINVAL; } - if (dm_target_needs_singleton(tgt->type)) { + if (dm_target_needs_singleton(ti->type)) { if (t->num_targets) { - tgt->error = "singleton target type must appear alone in table"; + ti->error = "singleton target type must appear alone in table"; goto bad; } t->singleton = true; } - if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { - tgt->error = "target type may not be included in a read-only table"; + if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) { + ti->error = "target type may not be included in a read-only table"; goto bad; } if (t->immutable_target_type) { - if (t->immutable_target_type != tgt->type) { - tgt->error = "immutable target type cannot be mixed with other target types"; + if (t->immutable_target_type != ti->type) { + ti->error = "immutable target type cannot be mixed with other target types"; goto bad; } - } else if (dm_target_is_immutable(tgt->type)) { + } else if (dm_target_is_immutable(ti->type)) { if (t->num_targets) { - tgt->error = "immutable target type cannot be mixed with other target types"; + ti->error = "immutable target type cannot be mixed with other target types"; goto bad; } - t->immutable_target_type = tgt->type; + t->immutable_target_type = ti->type; } - if (dm_target_has_integrity(tgt->type)) + if (dm_target_has_integrity(ti->type)) t->integrity_added = 1; - tgt->table = t; - tgt->begin = start; - tgt->len = len; - tgt->error = "Unknown error"; + ti->table = t; + ti->begin = start; + ti->len = len; + ti->error = "Unknown error"; /* * Does this target adjoin the previous one ? */ - if (!adjoin(t, tgt)) { - tgt->error = "Gap in table"; + if (!adjoin(t, ti)) { + ti->error = "Gap in table"; goto bad; } r = dm_split_args(&argc, &argv, params); if (r) { - tgt->error = "couldn't split parameters"; + ti->error = "couldn't split parameters"; goto bad; } - r = tgt->type->ctr(tgt, argc, argv); + r = ti->type->ctr(ti, argc, argv); kfree(argv); if (r) goto bad; - t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; + t->highs[t->num_targets++] = ti->begin + ti->len - 1; - if (!tgt->num_discard_bios && tgt->discards_supported) + if (!ti->num_discard_bios && ti->discards_supported) DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); - if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key)) + if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key)) static_branch_enable(&swap_bios_enabled); return 0; bad: - DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r)); - dm_put_target_type(tgt->type); + DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r)); + dm_put_target_type(ti->type); return r; } @@ -825,14 +824,11 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de } static bool dm_table_supports_dax(struct dm_table *t, - iterate_devices_callout_fn iterate_fn) + iterate_devices_callout_fn iterate_fn) { - struct dm_target *ti; - unsigned i; - /* Ensure that all targets support DAX. */ - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->type->direct_access) return false; @@ -860,9 +856,8 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, static int dm_table_determine_type(struct dm_table *t) { - unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - struct dm_target *tgt; + struct dm_target *ti; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); @@ -876,11 +871,11 @@ static int dm_table_determine_type(struct dm_table *t) goto verify_rq_based; } - for (i = 0; i < t->num_targets; i++) { - tgt = t->targets + i; - if (dm_target_hybrid(tgt)) + for (unsigned int i = 0; i < t->num_targets; i++) { + ti = dm_table_get_target(t, i); + if (dm_target_hybrid(ti)) hybrid = 1; - else if (dm_target_request_based(tgt)) + else if (dm_target_request_based(ti)) request_based = 1; else bio_based = 1; @@ -942,18 +937,18 @@ verify_rq_based: return 0; } - tgt = dm_table_get_immutable_target(t); - if (!tgt) { + ti = dm_table_get_immutable_target(t); + if (!ti) { DMERR("table load rejected: immutable target is required"); return -EINVAL; - } else if (tgt->max_io_len) { + } else if (ti->max_io_len) { DMERR("table load rejected: immutable target that splits IO is not supported"); return -EINVAL; } /* Non-request-stackable devices can't be used for request-based dm */ - if (!tgt->type->iterate_devices || - !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) { + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) { DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL; } @@ -983,11 +978,9 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t) struct dm_target *dm_table_get_wildcard_target(struct dm_table *t) { - struct dm_target *ti; - unsigned i; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); if (dm_target_is_wildcard(ti->type)) return ti; } @@ -1010,32 +1003,56 @@ static bool dm_table_supports_poll(struct dm_table *t); static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) { enum dm_queue_mode type = dm_table_get_type(t); - unsigned per_io_data_size = 0; - unsigned min_pool_size = 0; - struct dm_target *ti; - unsigned i; - bool poll_supported = false; + unsigned int per_io_data_size = 0, front_pad, io_front_pad; + unsigned int min_pool_size = 0, pool_size; + struct dm_md_mempools *pools; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - if (__table_type_bio_based(type)) { - for (i = 0; i < t->num_targets; i++) { - ti = t->targets + i; - per_io_data_size = max(per_io_data_size, ti->per_io_data_size); - min_pool_size = max(min_pool_size, ti->num_flush_bios); - } - poll_supported = dm_table_supports_poll(t); + pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); + if (!pools) + return -ENOMEM; + + if (type == DM_TYPE_REQUEST_BASED) { + pool_size = dm_get_reserved_rq_based_ios(); + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + goto init_bs; } - t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size, - t->integrity_supported, poll_supported); - if (!t->mempools) - return -ENOMEM; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + per_io_data_size = max(per_io_data_size, ti->per_io_data_size); + min_pool_size = max(min_pool_size, ti->num_flush_bios); + } + pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); + front_pad = roundup(per_io_data_size, + __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET; + + io_front_pad = roundup(per_io_data_size, + __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET; + if (bioset_init(&pools->io_bs, pool_size, io_front_pad, + dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0)) + goto out_free_pools; + if (t->integrity_supported && + bioset_integrity_create(&pools->io_bs, pool_size)) + goto out_free_pools; +init_bs: + if (bioset_init(&pools->bs, pool_size, front_pad, 0)) + goto out_free_pools; + if (t->integrity_supported && + bioset_integrity_create(&pools->bs, pool_size)) + goto out_free_pools; + + t->mempools = pools; return 0; + +out_free_pools: + dm_free_md_mempools(pools); + return -ENOMEM; } static int setup_indexes(struct dm_table *t) @@ -1100,10 +1117,10 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t) struct list_head *devices = dm_table_get_devices(t); struct dm_dev_internal *dd = NULL; struct gendisk *prev_disk = NULL, *template_disk = NULL; - unsigned i; - for (i = 0; i < dm_table_get_num_targets(t); i++) { + for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); + if (!dm_target_passes_integrity(ti->type)) goto no_integrity; } @@ -1217,18 +1234,19 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, struct dm_keyslot_evict_args args = { key }; struct dm_table *t; int srcu_idx; - int i; - struct dm_target *ti; t = dm_get_live_table(md, &srcu_idx); if (!t) return 0; - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) continue; ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); } + dm_put_live_table(md, srcu_idx); return args.err; } @@ -1277,7 +1295,6 @@ static int dm_table_construct_crypto_profile(struct dm_table *t) { struct dm_crypto_profile *dmcp; struct blk_crypto_profile *profile; - struct dm_target *ti; unsigned int i; bool empty_profile = true; @@ -1293,8 +1310,8 @@ static int dm_table_construct_crypto_profile(struct dm_table *t) memset(profile->modes_supported, 0xFF, sizeof(profile->modes_supported)); - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!dm_target_passes_crypto(ti->type)) { blk_crypto_intersect_capabilities(profile, NULL); @@ -1444,14 +1461,6 @@ inline sector_t dm_table_get_size(struct dm_table *t) } EXPORT_SYMBOL(dm_table_get_size); -struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) -{ - if (index >= t->num_targets) - return NULL; - - return t->targets + index; -} - /* * Search the btree for the correct target. * @@ -1512,11 +1521,8 @@ static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_any_dev_attr(struct dm_table *t, iterate_devices_callout_fn func, void *data) { - struct dm_target *ti; - unsigned int i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (ti->type->iterate_devices && ti->type->iterate_devices(ti, func, data)) @@ -1538,11 +1544,8 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_supports_poll(struct dm_table *t) { - struct dm_target *ti; - unsigned i = 0; - - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->type->iterate_devices || ti->type->iterate_devices(ti, device_not_poll_capable, NULL)) @@ -1558,18 +1561,15 @@ static bool dm_table_supports_poll(struct dm_table *t) * Returns false if the result is unknown because a target doesn't * support iterate_devices. */ -bool dm_table_has_no_data_devices(struct dm_table *table) +bool dm_table_has_no_data_devices(struct dm_table *t) { - struct dm_target *ti; - unsigned i, num_devices; - - for (i = 0; i < dm_table_get_num_targets(table); i++) { - ti = dm_table_get_target(table, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + unsigned num_devices = 0; if (!ti->type->iterate_devices) return false; - num_devices = 0; ti->type->iterate_devices(ti, count_device, &num_devices); if (num_devices) return false; @@ -1597,11 +1597,8 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_supports_zoned_model(struct dm_table *t, enum blk_zoned_model zoned_model) { - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (dm_target_supports_zoned_hm(ti->type)) { if (!ti->type->iterate_devices || @@ -1620,13 +1617,11 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); unsigned int *zone_sectors = data; - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(dev->bdev)) return 0; - - return blk_queue_zone_sectors(q) != *zone_sectors; + return bdev_zone_sectors(dev->bdev) != *zone_sectors; } /* @@ -1634,16 +1629,16 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev * * zone sectors, if the destination device is a zoned block device, it shall * have the specified zone_sectors. */ -static int validate_hardware_zoned_model(struct dm_table *table, +static int validate_hardware_zoned_model(struct dm_table *t, enum blk_zoned_model zoned_model, unsigned int zone_sectors) { if (zoned_model == BLK_ZONED_NONE) return 0; - if (!dm_table_supports_zoned_model(table, zoned_model)) { + if (!dm_table_supports_zoned_model(t, zoned_model)) { DMERR("%s: zoned model is not consistent across all devices", - dm_device_name(table->md)); + dm_device_name(t->md)); return -EINVAL; } @@ -1651,9 +1646,9 @@ static int validate_hardware_zoned_model(struct dm_table *table, if (!zone_sectors || !is_power_of_2(zone_sectors)) return -EINVAL; - if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) { + if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) { DMERR("%s: zone sectors is not consistent across all zoned devices", - dm_device_name(table->md)); + dm_device_name(t->md)); return -EINVAL; } @@ -1663,21 +1658,19 @@ static int validate_hardware_zoned_model(struct dm_table *table, /* * Establish the new table's queue_limits and validate them. */ -int dm_calculate_queue_limits(struct dm_table *table, +int dm_calculate_queue_limits(struct dm_table *t, struct queue_limits *limits) { - struct dm_target *ti; struct queue_limits ti_limits; - unsigned i; enum blk_zoned_model zoned_model = BLK_ZONED_NONE; unsigned int zone_sectors = 0; blk_set_stacking_limits(limits); - for (i = 0; i < dm_table_get_num_targets(table); i++) { - blk_set_stacking_limits(&ti_limits); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); - ti = dm_table_get_target(table, i); + blk_set_stacking_limits(&ti_limits); if (!ti->type->iterate_devices) goto combine_limits; @@ -1718,7 +1711,7 @@ combine_limits: DMWARN("%s: adding target device " "(start sect %llu len %llu) " "caused an alignment inconsistency", - dm_device_name(table->md), + dm_device_name(t->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); } @@ -1738,10 +1731,10 @@ combine_limits: zoned_model = limits->zoned; zone_sectors = limits->chunk_sectors; } - if (validate_hardware_zoned_model(table, zoned_model, zone_sectors)) + if (validate_hardware_zoned_model(t, zoned_model, zone_sectors)) return -EINVAL; - return validate_hardware_logical_block_alignment(table, limits); + return validate_hardware_logical_block_alignment(t, limits); } /* @@ -1785,17 +1778,14 @@ static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) { - struct dm_target *ti; - unsigned i; - /* * Require at least one underlying device to support flushes. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets * supporting flushes must provide. */ - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->num_flush_bios) continue; @@ -1849,11 +1839,8 @@ static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev * static bool dm_table_supports_write_zeroes(struct dm_table *t) { - struct dm_target *ti; - unsigned i = 0; - - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->num_write_zeroes_bios) return false; @@ -1876,11 +1863,8 @@ static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_supports_nowait(struct dm_table *t) { - struct dm_target *ti; - unsigned i = 0; - - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!dm_target_supports_nowait(ti->type)) return false; @@ -1901,11 +1885,8 @@ static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, static bool dm_table_supports_discards(struct dm_table *t) { - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->num_discard_bios) return false; @@ -1933,11 +1914,8 @@ static int device_not_secure_erase_capable(struct dm_target *ti, static bool dm_table_supports_secure_erase(struct dm_table *t) { - struct dm_target *ti; - unsigned int i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->num_secure_erase_bios) return false; @@ -2067,11 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, return 0; } -unsigned int dm_table_get_num_targets(struct dm_table *t) -{ - return t->num_targets; -} - struct list_head *dm_table_get_devices(struct dm_table *t) { return &t->devices; @@ -2091,12 +2064,11 @@ enum suspend_mode { static void suspend_targets(struct dm_table *t, enum suspend_mode mode) { - int i = t->num_targets; - struct dm_target *ti = t->targets; - lockdep_assert_held(&t->md->suspend_lock); - while (i--) { + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + switch (mode) { case PRESUSPEND: if (ti->type->presuspend) @@ -2111,7 +2083,6 @@ static void suspend_targets(struct dm_table *t, enum suspend_mode mode) ti->type->postsuspend(ti); break; } - ti++; } } @@ -2141,12 +2112,13 @@ void dm_table_postsuspend_targets(struct dm_table *t) int dm_table_resume_targets(struct dm_table *t) { - int i, r = 0; + unsigned int i; + int r = 0; lockdep_assert_held(&t->md->suspend_lock); for (i = 0; i < t->num_targets; i++) { - struct dm_target *ti = t->targets + i; + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->type->preresume) continue; @@ -2160,7 +2132,7 @@ int dm_table_resume_targets(struct dm_table *t) } for (i = 0; i < t->num_targets; i++) { - struct dm_target *ti = t->targets + i; + struct dm_target *ti = dm_table_get_target(t, i); if (ti->type->resume) ti->type->resume(ti); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 2db7030aba00..a27395c8621f 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_sm_threshold_fn fn, void *context) { - int r; + int r = -EINVAL; pmd_write_lock_in_core(pmd); - r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); + if (!pmd->fail_io) { + r = dm_sm_register_threshold_callback(pmd->metadata_sm, + threshold, fn, context); + } pmd_write_unlock(pmd); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 84c083f76673..e76c96c760a9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) calc_metadata_threshold(pt), metadata_low_callback, pool); - if (r) + if (r) { + ti->error = "Error registering metadata threshold"; goto out_flags_changed; + } dm_pool_register_pre_commit_callback(pool->pmd, metadata_pre_commit_callback, pool); diff --git a/drivers/md/dm-verity-loadpin.c b/drivers/md/dm-verity-loadpin.c new file mode 100644 index 000000000000..387ec43aef72 --- /dev/null +++ b/drivers/md/dm-verity-loadpin.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/dm-verity-loadpin.h> + +#include "dm.h" +#include "dm-core.h" +#include "dm-verity.h" + +#define DM_MSG_PREFIX "verity-loadpin" + +LIST_HEAD(dm_verity_loadpin_trusted_root_digests); + +static bool is_trusted_verity_target(struct dm_target *ti) +{ + u8 *root_digest; + unsigned int digest_size; + struct dm_verity_loadpin_trusted_root_digest *trd; + bool trusted = false; + + if (!dm_is_verity_target(ti)) + return false; + + if (dm_verity_get_root_digest(ti, &root_digest, &digest_size)) + return false; + + list_for_each_entry(trd, &dm_verity_loadpin_trusted_root_digests, node) { + if ((trd->len == digest_size) && + !memcmp(trd->data, root_digest, digest_size)) { + trusted = true; + break; + } + } + + kfree(root_digest); + + return trusted; +} + +/* + * Determines whether the file system of a superblock is located on + * a verity device that is trusted by LoadPin. + */ +bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) +{ + struct mapped_device *md; + struct dm_table *table; + struct dm_target *ti; + int srcu_idx; + bool trusted = false; + + if (list_empty(&dm_verity_loadpin_trusted_root_digests)) + return false; + + md = dm_get_md(bdev->bd_dev); + if (!md) + return false; + + table = dm_get_live_table(md, &srcu_idx); + + if (table->num_targets != 1) + goto out; + + ti = dm_table_get_target(table, 0); + + if (is_trusted_verity_target(ti)) + trusted = true; + +out: + dm_put_live_table(md, srcu_idx); + dm_put(md); + + return trusted; +} diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d6dbd47492a8..4fd853a56b1a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/reboot.h> #include <linux/scatterlist.h> +#include <linux/string.h> #define DM_MSG_PREFIX "verity" @@ -527,11 +528,10 @@ static int verity_verify_io(struct dm_verity_io *io) if (v->validated_blocks) set_bit(cur_block, v->validated_blocks); continue; - } - else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - cur_block, NULL, &start) == 0) + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, + cur_block, NULL, &start) == 0) { continue; - else { + } else { if (bio->bi_status) { /* * Error correction failed; Just return error @@ -1310,10 +1310,40 @@ bad: return r; } +/* + * Check whether a DM target is a verity target. + */ +bool dm_is_verity_target(struct dm_target *ti) +{ + return ti->type->module == THIS_MODULE; +} + +/* + * Get the root digest of a verity target. + * + * Returns a copy of the root digest, the caller is responsible for + * freeing the memory of the digest. + */ +int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned int *digest_size) +{ + struct dm_verity *v = ti->private; + + if (!dm_is_verity_target(ti)) + return -EINVAL; + + *root_digest = kmemdup(v->root_digest, v->digest_size, GFP_KERNEL); + if (*root_digest == NULL) + return -ENOMEM; + + *digest_size = v->digest_size; + + return 0; +} + static struct target_type verity_target = { .name = "verity", .features = DM_TARGET_IMMUTABLE, - .version = {1, 8, 0}, + .version = {1, 8, 1}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 4e769d13473a..c832cc3e3d24 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -129,4 +129,8 @@ extern int verity_hash(struct dm_verity *v, struct ahash_request *req, extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); +extern bool dm_is_verity_target(struct dm_target *ti); +extern int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, + unsigned int *digest_size); + #endif /* DM_VERITY_H */ diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index d74c5a7a0ab4..1fc161d65673 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -22,7 +22,7 @@ #define HIGH_WATERMARK 50 #define LOW_WATERMARK 45 -#define MAX_WRITEBACK_JOBS 0 +#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16) #define ENDIO_LATENCY 16 #define WRITEBACK_LATENCY 64 #define AUTOCOMMIT_BLOCKS_SSD 65536 @@ -523,8 +523,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) region.sector += wc->start_sector; atomic_inc(&endio.count); - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_SYNC; + req.bi_opf = REQ_OP_WRITE | REQ_SYNC; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; req.client = wc->dm_io; @@ -562,8 +561,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) region.sector += wc->start_sector; - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_SYNC | REQ_FUA; + req.bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_FUA; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map; req.client = wc->dm_io; @@ -592,8 +590,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) region.bdev = dev->bdev; region.sector = 0; region.count = 0; - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_PREFLUSH; + req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; req.mem.type = DM_IO_KMEM; req.mem.ptr.addr = NULL; req.client = wc->dm_io; @@ -981,8 +978,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors region.bdev = wc->ssd_dev->bdev; region.sector = wc->start_sector; region.count = n_sectors; - req.bi_op = REQ_OP_READ; - req.bi_op_flags = REQ_SYNC; + req.bi_opf = REQ_OP_READ | REQ_SYNC; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map; req.client = wc->dm_io; @@ -1329,8 +1325,8 @@ enum wc_map_op { WC_MAP_ERROR, }; -static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, - struct wc_entry *e) +static void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e) { if (e) { sector_t next_boundary = @@ -1338,8 +1334,6 @@ static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, stru if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) dm_accept_partial_bio(bio, next_boundary); } - - return WC_MAP_REMAP_ORIGIN; } static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) @@ -1366,14 +1360,16 @@ read_next_block: map_op = WC_MAP_REMAP; } } else { - map_op = writecache_map_remap_origin(wc, bio, e); + writecache_map_remap_origin(wc, bio, e); + wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; + map_op = WC_MAP_REMAP_ORIGIN; } return map_op; } -static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, - struct wc_entry *e, bool search_used) +static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e, bool search_used) { unsigned bio_size = wc->block_size; sector_t start_cache_sec = cache_sector(wc, e); @@ -1413,14 +1409,15 @@ static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct b bio->bi_iter.bi_sector = start_cache_sec; dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; + wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { wc->uncommitted_blocks = 0; queue_work(wc->writeback_wq, &wc->flush_work); } else { writecache_schedule_autocommit(wc); } - - return WC_MAP_REMAP; } static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) @@ -1430,9 +1427,10 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio do { bool found_entry = false; bool search_used = false; - wc->stats.writes++; - if (writecache_has_error(wc)) + if (writecache_has_error(wc)) { + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; return WC_MAP_ERROR; + } e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); if (e) { if (!writecache_entry_is_committed(wc, e)) { @@ -1456,9 +1454,11 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio if (unlikely(!e)) { if (!WC_MODE_PMEM(wc) && !found_entry) { direct_write: - wc->stats.writes_around++; e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - return writecache_map_remap_origin(wc, bio, e); + writecache_map_remap_origin(wc, bio, e); + wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits; + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; + return WC_MAP_REMAP_ORIGIN; } wc->stats.writes_blocked_on_freelist++; writecache_wait_on_freelist(wc); @@ -1469,10 +1469,13 @@ direct_write: wc->uncommitted_blocks++; wc->stats.writes_allocate++; bio_copy: - if (WC_MODE_PMEM(wc)) + if (WC_MODE_PMEM(wc)) { bio_copy_block(wc, bio, memory_data(wc, e)); - else - return writecache_bio_copy_ssd(wc, bio, e, search_used); + wc->stats.writes++; + } else { + writecache_bio_copy_ssd(wc, bio, e, search_used); + return WC_MAP_REMAP; + } } while (bio->bi_iter.bi_size); if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) @@ -1507,7 +1510,7 @@ static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) { - wc->stats.discards++; + wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits; if (writecache_has_error(wc)) return WC_MAP_ERROR; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 3e7b1fe1580b..3dafc0e8b7a9 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -139,13 +139,11 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) void dm_cleanup_zoned_dev(struct mapped_device *md) { - struct request_queue *q = md->queue; - - if (q) { - kfree(q->conv_zones_bitmap); - q->conv_zones_bitmap = NULL; - kfree(q->seq_zones_wlock); - q->seq_zones_wlock = NULL; + if (md->disk) { + kfree(md->disk->conv_zones_bitmap); + md->disk->conv_zones_bitmap = NULL; + kfree(md->disk->seq_zones_wlock); + md->disk->seq_zones_wlock = NULL; } kvfree(md->zwp_offset); @@ -179,31 +177,31 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct mapped_device *md = data; - struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: - if (!q->conv_zones_bitmap) { - q->conv_zones_bitmap = - kcalloc(BITS_TO_LONGS(q->nr_zones), + if (!disk->conv_zones_bitmap) { + disk->conv_zones_bitmap = + kcalloc(BITS_TO_LONGS(disk->nr_zones), sizeof(unsigned long), GFP_NOIO); - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return -ENOMEM; } - set_bit(idx, q->conv_zones_bitmap); + set_bit(idx, disk->conv_zones_bitmap); break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: - if (!q->seq_zones_wlock) { - q->seq_zones_wlock = - kcalloc(BITS_TO_LONGS(q->nr_zones), + if (!disk->seq_zones_wlock) { + disk->seq_zones_wlock = + kcalloc(BITS_TO_LONGS(disk->nr_zones), sizeof(unsigned long), GFP_NOIO); - if (!q->seq_zones_wlock) + if (!disk->seq_zones_wlock) return -ENOMEM; } if (!md->zwp_offset) { md->zwp_offset = - kvcalloc(q->nr_zones, sizeof(unsigned int), + kvcalloc(disk->nr_zones, sizeof(unsigned int), GFP_KERNEL); if (!md->zwp_offset) return -ENOMEM; @@ -228,7 +226,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, */ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) { - struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; unsigned int noio_flag; int ret; @@ -236,7 +234,7 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) * Check if something changed. If yes, cleanup the current resources * and reallocate everything. */ - if (!q->nr_zones || q->nr_zones != md->nr_zones) + if (!disk->nr_zones || disk->nr_zones != md->nr_zones) dm_cleanup_zoned_dev(md); if (md->nr_zones) return 0; @@ -246,17 +244,17 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) * operations in this context are done as if GFP_NOIO was specified. */ noio_flag = memalloc_noio_save(); - ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones, + ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones, dm_zone_revalidate_cb, md); memalloc_noio_restore(noio_flag); if (ret < 0) goto err; - if (ret != q->nr_zones) { + if (ret != disk->nr_zones) { ret = -EIO; goto err; } - md->nr_zones = q->nr_zones; + md->nr_zones = disk->nr_zones; return 0; @@ -270,16 +268,13 @@ static int device_not_zone_append_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - return !blk_queue_is_zoned(bdev_get_queue(dev->bdev)); + return !bdev_is_zoned(dev->bdev); } static bool dm_table_supports_zone_append(struct dm_table *t) { - struct dm_target *ti; - unsigned int i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (ti->emulate_zone_append) return false; @@ -301,7 +296,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) * correct value to be exposed in sysfs queue/nr_zones. */ WARN_ON_ONCE(queue_is_mq(q)); - q->nr_zones = blkdev_nr_zones(md->disk); + md->disk->nr_zones = bdev_nr_zones(md->disk->part0); /* Check if zone append is natively supported */ if (dm_table_supports_zone_append(t)) { @@ -334,7 +329,7 @@ static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, unsigned int *wp_ofst) { - sector_t sector = zno * blk_queue_zone_sectors(md->queue); + sector_t sector = zno * bdev_zone_sectors(md->disk->part0); unsigned int noio_flag; struct dm_table *t; int srcu_idx, ret; @@ -361,7 +356,7 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, } struct orig_bio_details { - unsigned int op; + enum req_op op; unsigned int nr_sectors; }; @@ -373,7 +368,7 @@ struct orig_bio_details { static bool dm_zone_map_bio_begin(struct mapped_device *md, unsigned int zno, struct bio *clone) { - sector_t zsectors = blk_queue_zone_sectors(md->queue); + sector_t zsectors = bdev_zone_sectors(md->disk->part0); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* @@ -443,7 +438,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z return BLK_STS_OK; case REQ_OP_ZONE_FINISH: WRITE_ONCE(md->zwp_offset[zno], - blk_queue_zone_sectors(md->queue)); + bdev_zone_sectors(md->disk->part0)); return BLK_STS_OK; case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE: @@ -466,26 +461,26 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z } } -static inline void dm_zone_lock(struct request_queue *q, - unsigned int zno, struct bio *clone) +static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, + struct bio *clone) { if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) return; - wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); + wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); } -static inline void dm_zone_unlock(struct request_queue *q, - unsigned int zno, struct bio *clone) +static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, + struct bio *clone) { if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) return; - WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock)); - clear_bit_unlock(zno, q->seq_zones_wlock); + WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); + clear_bit_unlock(zno, disk->seq_zones_wlock); smp_mb__after_atomic(); - wake_up_bit(q->seq_zones_wlock, zno); + wake_up_bit(disk->seq_zones_wlock, zno); bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); } @@ -520,7 +515,6 @@ int dm_zone_map_bio(struct dm_target_io *tio) struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; struct mapped_device *md = io->md; - struct request_queue *q = md->queue; struct bio *clone = &tio->clone; struct orig_bio_details orig_bio_details; unsigned int zno; @@ -536,7 +530,7 @@ int dm_zone_map_bio(struct dm_target_io *tio) /* Lock the target zone */ zno = bio_zone_no(clone); - dm_zone_lock(q, zno, clone); + dm_zone_lock(md->disk, zno, clone); orig_bio_details.nr_sectors = bio_sectors(clone); orig_bio_details.op = bio_op(clone); @@ -546,7 +540,7 @@ int dm_zone_map_bio(struct dm_target_io *tio) * both valid, and if the bio is a zone append, remap it to a write. */ if (!dm_zone_map_bio_begin(md, zno, clone)) { - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); return DM_MAPIO_KILL; } @@ -570,12 +564,12 @@ int dm_zone_map_bio(struct dm_target_io *tio) sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, *tio->len_ptr); if (sts != BLK_STS_OK) - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); break; case DM_MAPIO_REQUEUE: case DM_MAPIO_KILL: default: - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); sts = BLK_STS_IOERR; break; } @@ -592,7 +586,7 @@ int dm_zone_map_bio(struct dm_target_io *tio) void dm_zone_endio(struct dm_io *io, struct bio *clone) { struct mapped_device *md = io->md; - struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; struct bio *orig_bio = io->orig_bio; unsigned int zwp_offset; unsigned int zno; @@ -608,7 +602,8 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) */ if (clone->bi_status == BLK_STS_OK && bio_op(clone) == REQ_OP_ZONE_APPEND) { - sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1; + sector_t mask = + (sector_t)bdev_zone_sectors(disk->part0) - 1; orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask; @@ -649,5 +644,5 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) zwp_offset - bio_sectors(orig_bio); } - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(disk, zno, clone); } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index d1ea66114d14..34db364c23a8 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -737,7 +737,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, /* * Read/write a metadata block. */ -static int dmz_rdwr_block(struct dmz_dev *dev, int op, +static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op, sector_t block, struct page *page) { struct bio *bio; @@ -2045,7 +2045,8 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, * allocated and used to map the chunk. * The zone returned will be set to the active state. */ -struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op) +struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, + unsigned int chunk, enum req_op op) { struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT]; struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 0ec5d8b9b1a4..95b132b52f33 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -764,8 +764,7 @@ static void dmz_put_zoned_device(struct dm_target *ti) static int dmz_fixup_devices(struct dm_target *ti) { struct dmz_target *dmz = ti->private; - struct dmz_dev *reg_dev, *zoned_dev; - struct request_queue *q; + struct dmz_dev *reg_dev = NULL; sector_t zone_nr_sectors = 0; int i; @@ -780,32 +779,32 @@ static int dmz_fixup_devices(struct dm_target *ti) return -EINVAL; } for (i = 1; i < dmz->nr_ddevs; i++) { - zoned_dev = &dmz->dev[i]; + struct dmz_dev *zoned_dev = &dmz->dev[i]; + struct block_device *bdev = zoned_dev->bdev; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { ti->error = "Secondary disk is not a zoned device"; return -EINVAL; } - q = bdev_get_queue(zoned_dev->bdev); if (zone_nr_sectors && - zone_nr_sectors != blk_queue_zone_sectors(q)) { + zone_nr_sectors != bdev_zone_sectors(bdev)) { ti->error = "Zone nr sectors mismatch"; return -EINVAL; } - zone_nr_sectors = blk_queue_zone_sectors(q); + zone_nr_sectors = bdev_zone_sectors(bdev); zoned_dev->zone_nr_sectors = zone_nr_sectors; - zoned_dev->nr_zones = - blkdev_nr_zones(zoned_dev->bdev->bd_disk); + zoned_dev->nr_zones = bdev_nr_zones(bdev); } } else { - reg_dev = NULL; - zoned_dev = &dmz->dev[0]; + struct dmz_dev *zoned_dev = &dmz->dev[0]; + struct block_device *bdev = zoned_dev->bdev; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { ti->error = "Disk is not a zoned device"; return -EINVAL; } - q = bdev_get_queue(zoned_dev->bdev); - zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q); - zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk); + zoned_dev->zone_nr_sectors = bdev_zone_sectors(bdev); + zoned_dev->nr_zones = bdev_nr_zones(bdev); } if (reg_dev) { diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index a02744a0846c..265494d3f711 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -248,7 +248,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int dev_idx, bool idle); struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, - unsigned int chunk, int op); + unsigned int chunk, enum req_op op); void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *zone); struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, struct dm_zone *dzone); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d8f16183bf27..99642f69bfa7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -88,10 +88,6 @@ struct clone_info { bool submit_as_polled:1; }; -#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone)) -#define DM_IO_BIO_OFFSET \ - (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio)) - static inline struct dm_target_io *clone_to_tio(struct bio *clone) { return container_of(clone, struct dm_target_io, clone); @@ -415,7 +411,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, struct block_device **bdev) { - struct dm_target *tgt; + struct dm_target *ti; struct dm_table *map; int r; @@ -426,17 +422,17 @@ retry: return r; /* We only support devices that have a single target */ - if (dm_table_get_num_targets(map) != 1) + if (map->num_targets != 1) return r; - tgt = dm_table_get_target(map, 0); - if (!tgt->type->prepare_ioctl) + ti = dm_table_get_target(map, 0); + if (!ti->type->prepare_ioctl) return r; if (dm_suspended_md(md)) return -EAGAIN; - r = tgt->type->prepare_ioctl(tgt, bdev); + r = ti->type->prepare_ioctl(ti, bdev); if (r == -ENOTCONN && !fatal_signal_pending(current)) { dm_put_live_table(md, *srcu_idx); msleep(10); @@ -555,6 +551,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) unsigned long flags; /* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */ spin_lock_irqsave(&io->lock, flags); + if (dm_io_flagged(io, DM_IO_ACCOUNTED)) { + spin_unlock_irqrestore(&io->lock, flags); + return; + } dm_io_set_flag(io, DM_IO_ACCOUNTED); spin_unlock_irqrestore(&io->lock, flags); } @@ -574,9 +574,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct bio *clone; clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); - /* Set default bdev, but target must bio_set_dev() before issuing IO */ - clone->bi_bdev = md->disk->part0; - tio = clone_to_tio(clone); tio->flags = 0; dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); @@ -609,6 +606,7 @@ static void free_io(struct dm_io *io) static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask) { + struct mapped_device *md = ci->io->md; struct dm_target_io *tio; struct bio *clone; @@ -618,14 +616,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, /* alloc_io() already initialized embedded clone */ clone = &tio->clone; } else { - struct mapped_device *md = ci->io->md; - clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->mempools->bs); if (!clone) return NULL; - /* Set default bdev, but target must bio_set_dev() before issuing IO */ - clone->bi_bdev = md->disk->part0; /* REQ_DM_POLL_LIST shouldn't be inherited */ clone->bi_opf &= ~REQ_DM_POLL_LIST; @@ -641,6 +635,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->len_ptr = len; tio->old_sector = 0; + /* Set default bdev, but target must bio_set_dev() before issuing IO */ + clone->bi_bdev = md->disk->part0; + if (unlikely(ti->needs_bio_set_dev)) + bio_set_dev(clone, md->disk->part0); + if (len) { clone->bi_iter.bi_size = to_bytes(*len); if (bio_integrity(clone)) @@ -711,18 +710,18 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) } static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, - int *srcu_idx, struct bio *bio) + int *srcu_idx, blk_opf_t bio_opf) { - if (bio->bi_opf & REQ_NOWAIT) + if (bio_opf & REQ_NOWAIT) return dm_get_live_table_fast(md); else return dm_get_live_table(md, srcu_idx); } static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx, - struct bio *bio) + blk_opf_t bio_opf) { - if (bio->bi_opf & REQ_NOWAIT) + if (bio_opf & REQ_NOWAIT) dm_put_live_table_fast(md); else dm_put_live_table(md, srcu_idx); @@ -879,22 +878,63 @@ static int __noflush_suspending(struct mapped_device *md) return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); } -static void dm_io_complete(struct dm_io *io) +static void dm_requeue_add_io(struct dm_io *io, bool first_stage) { - blk_status_t io_error; struct mapped_device *md = io->md; + + if (first_stage) { + struct dm_io *next = md->requeue_list; + + md->requeue_list = io; + io->next = next; + } else { + bio_list_add_head(&md->deferred, io->orig_bio); + } +} + +static void dm_kick_requeue(struct mapped_device *md, bool first_stage) +{ + if (first_stage) + queue_work(md->wq, &md->requeue_work); + else + queue_work(md->wq, &md->work); +} + +/* + * Return true if the dm_io's original bio is requeued. + * io->status is updated with error if requeue disallowed. + */ +static bool dm_handle_requeue(struct dm_io *io, bool first_stage) +{ struct bio *bio = io->orig_bio; + bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE); + bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) && + (bio->bi_opf & REQ_POLLED)); + struct mapped_device *md = io->md; + bool requeued = false; - if (io->status == BLK_STS_DM_REQUEUE) { + if (handle_requeue || handle_polled_eagain) { unsigned long flags; + + if (bio->bi_opf & REQ_POLLED) { + /* + * Upper layer won't help us poll split bio + * (io->orig_bio may only reflect a subset of the + * pre-split original) so clear REQ_POLLED. + */ + bio_clear_polled(bio); + } + /* - * Target requested pushing back the I/O. + * Target requested pushing back the I/O or + * polled IO hit BLK_STS_AGAIN. */ spin_lock_irqsave(&md->deferred_lock, flags); - if (__noflush_suspending(md) && - !WARN_ON_ONCE(dm_is_zone_write(md, bio))) { - /* NOTE early return due to BLK_STS_DM_REQUEUE below */ - bio_list_add_head(&md->deferred, bio); + if ((__noflush_suspending(md) && + !WARN_ON_ONCE(dm_is_zone_write(md, bio))) || + handle_polled_eagain || first_stage) { + dm_requeue_add_io(io, first_stage); + requeued = true; } else { /* * noflush suspend was interrupted or this is @@ -905,6 +945,23 @@ static void dm_io_complete(struct dm_io *io) spin_unlock_irqrestore(&md->deferred_lock, flags); } + if (requeued) + dm_kick_requeue(md, first_stage); + + return requeued; +} + +static void __dm_io_complete(struct dm_io *io, bool first_stage) +{ + struct bio *bio = io->orig_bio; + struct mapped_device *md = io->md; + blk_status_t io_error; + bool requeued; + + requeued = dm_handle_requeue(io, first_stage); + if (requeued && first_stage) + return; + io_error = io->status; if (dm_io_flagged(io, DM_IO_ACCOUNTED)) dm_end_io_acct(io); @@ -924,21 +981,9 @@ static void dm_io_complete(struct dm_io *io) if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); - if (io_error == BLK_STS_DM_REQUEUE || io_error == BLK_STS_AGAIN) { - if (bio->bi_opf & REQ_POLLED) { - /* - * Upper layer won't help us poll split bio (io->orig_bio - * may only reflect a subset of the pre-split original) - * so clear REQ_POLLED in case of requeue. - */ - bio_clear_polled(bio); - if (io_error == BLK_STS_AGAIN) { - /* io_uring doesn't handle BLK_STS_AGAIN (yet) */ - queue_io(md, bio); - } - } + /* Return early if the original bio was requeued */ + if (requeued) return; - } if (bio_is_flush_with_data(bio)) { /* @@ -955,6 +1000,58 @@ static void dm_io_complete(struct dm_io *io) } } +static void dm_wq_requeue_work(struct work_struct *work) +{ + struct mapped_device *md = container_of(work, struct mapped_device, + requeue_work); + unsigned long flags; + struct dm_io *io; + + /* reuse deferred lock to simplify dm_handle_requeue */ + spin_lock_irqsave(&md->deferred_lock, flags); + io = md->requeue_list; + md->requeue_list = NULL; + spin_unlock_irqrestore(&md->deferred_lock, flags); + + while (io) { + struct dm_io *next = io->next; + + dm_io_rewind(io, &md->queue->bio_split); + + io->next = NULL; + __dm_io_complete(io, false); + io = next; + } +} + +/* + * Two staged requeue: + * + * 1) io->orig_bio points to the real original bio, and the part mapped to + * this io must be requeued, instead of other parts of the original bio. + * + * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io. + */ +static void dm_io_complete(struct dm_io *io) +{ + bool first_requeue; + + /* + * Only dm_io that has been split needs two stage requeue, otherwise + * we may run into long bio clone chain during suspend and OOM could + * be triggered. + * + * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they + * also aren't handled via the first stage requeue. + */ + if (dm_io_flagged(io, DM_IO_WAS_SPLIT)) + first_requeue = true; + else + first_requeue = false; + + __dm_io_complete(io, first_requeue); +} + /* * Decrements the number of outstanding ios that a bio has been * cloned into, completing the original io if necc. @@ -1026,7 +1123,7 @@ static void clone_endio(struct bio *bio) } if (static_branch_unlikely(&zoned_enabled) && - unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev)))) + unlikely(bdev_is_zoned(bio->bi_bdev))) dm_zone_endio(io, bio); if (endio) { @@ -1079,23 +1176,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector) { sector_t target_offset = dm_target_offset(ti, sector); sector_t len = max_io_len_target_boundary(ti, target_offset); - sector_t max_len; /* * Does the target need to split IO even further? * - varied (per target) IO splitting is a tenet of DM; this * explains why stacked chunk_sectors based splitting via - * blk_max_size_offset() isn't possible here. So pass in - * ti->max_io_len to override stacked chunk_sectors. + * blk_queue_split() isn't possible here. */ - if (ti->max_io_len) { - max_len = blk_max_size_offset(ti->table->md->queue, - target_offset, ti->max_io_len); - if (len > max_len) - len = max_len; - } - - return len; + if (!ti->max_io_len) + return len; + return min_t(sector_t, len, + min(queue_max_sectors(ti->table->md->queue), + blk_chunk_sectors_left(target_offset, ti->max_io_len))); } int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) @@ -1238,6 +1330,7 @@ out: void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) { struct dm_target_io *tio = clone_to_tio(bio); + struct dm_io *io = tio->io; unsigned bio_sectors = bio_sectors(bio); BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO)); @@ -1253,8 +1346,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) * __split_and_process_bio() may have already saved mapped part * for accounting but it is being reduced so update accordingly. */ - dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT); - tio->io->sectors = n_sectors; + dm_io_set_flag(io, DM_IO_WAS_SPLIT); + io->sectors = n_sectors; + io->sector_offset = bio_sectors(io->orig_bio); } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); @@ -1377,17 +1471,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len) */ dm_io_set_flag(io, DM_IO_WAS_SPLIT); io->sectors = len; - } - - if (static_branch_unlikely(&stats_enabled) && - unlikely(dm_stats_used(&io->md->stats))) { - /* - * Save bi_sector in terms of its offset from end of - * original bio, only needed for DM-stats' benefit. - * - saved regardless of whether split needed so that - * dm_accept_partial_bio() doesn't need to. - */ - io->sector_offset = bio_end_sector(ci->bio) - ci->sector; + io->sector_offset = bio_sectors(ci->bio); } } @@ -1421,11 +1505,11 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, unsigned *len) + unsigned int num_bios, unsigned *len) { struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; - int ret = 0; + unsigned int ret = 0; switch (num_bios) { case 0: @@ -1453,8 +1537,7 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, static void __send_empty_flush(struct clone_info *ci) { - unsigned target_nr = 0; - struct dm_target *ti; + struct dm_table *t = ci->map; struct bio flush_bio; /* @@ -1469,8 +1552,9 @@ static void __send_empty_flush(struct clone_info *ci) ci->sector_count = 0; ci->io->tio.clone.bi_iter.bi_size = 0; - while ((ti = dm_table_get_target(ci->map, target_nr++))) { - int bios; + for (unsigned int i = 0; i < t->num_targets; i++) { + unsigned int bios; + struct dm_target *ti = dm_table_get_target(t, i); atomic_add(ti->num_flush_bios, &ci->io->io_count); bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); @@ -1490,7 +1574,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target unsigned num_bios) { unsigned len; - int bios; + unsigned int bios; len = min_t(sector_t, ci->sector_count, max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); @@ -1509,7 +1593,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target static bool is_abnormal_io(struct bio *bio) { - unsigned int op = bio_op(bio); + enum req_op op = bio_op(bio); if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) { switch (op) { @@ -1540,6 +1624,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; break; + default: + break; } /* @@ -1609,14 +1695,19 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ti = dm_table_find_target(ci->map, ci->sector); if (unlikely(!ti)) return BLK_STS_IOERR; - else if (unlikely(ci->is_abnormal_io)) + + if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && + unlikely(!dm_target_supports_nowait(ti->type))) + return BLK_STS_NOTSUPP; + + if (unlikely(ci->is_abnormal_io)) return __process_abnormal_io(ci, ti); /* * Only support bio polling for normal IO, and the target io is * exactly inside the dm_io instance (verified in dm_poll_dm_io) */ - ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED; + ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED); len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); setup_split_accounting(ci, len); @@ -1711,8 +1802,9 @@ static void dm_submit_bio(struct bio *bio) struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; int srcu_idx; struct dm_table *map; + blk_opf_t bio_opf = bio->bi_opf; - map = dm_get_live_table_bio(md, &srcu_idx, bio); + map = dm_get_live_table_bio(md, &srcu_idx, bio_opf); /* If suspended, or map not yet available, queue this IO for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || @@ -1728,7 +1820,7 @@ static void dm_submit_bio(struct bio *bio) dm_split_and_process_bio(md, map, bio); out: - dm_put_live_table_bio(md, srcu_idx, bio); + dm_put_live_table_bio(md, srcu_idx, bio_opf); } static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, @@ -1884,7 +1976,7 @@ static void cleanup_mapped_device(struct mapped_device *md) del_gendisk(md->disk); } dm_queue_destroy_crypto_profile(md->queue); - blk_cleanup_disk(md->disk); + put_disk(md->disk); } if (md->pending_io) { @@ -1959,9 +2051,11 @@ static struct mapped_device *alloc_dev(int minor) init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); + INIT_WORK(&md->requeue_work, dm_wq_requeue_work); init_waitqueue_head(&md->eventq); init_completion(&md->kobj_holder.completion); + md->requeue_list = NULL; md->swap_bios = get_swap_bios(); sema_init(&md->swap_bios_semaphore, md->swap_bios); mutex_init(&md->swap_bios_lock); @@ -2968,54 +3062,6 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type, - unsigned per_io_data_size, unsigned min_pool_size, - bool integrity, bool poll) -{ - struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); - unsigned int pool_size = 0; - unsigned int front_pad, io_front_pad; - int ret; - - if (!pools) - return NULL; - - switch (type) { - case DM_TYPE_BIO_BASED: - case DM_TYPE_DAX_BIO_BASED: - pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); - front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET; - io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET; - ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0); - if (ret) - goto out; - if (integrity && bioset_integrity_create(&pools->io_bs, pool_size)) - goto out; - break; - case DM_TYPE_REQUEST_BASED: - pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size); - front_pad = offsetof(struct dm_rq_clone_bio_info, clone); - /* per_io_data_size is used for blk-mq pdu at queue allocation */ - break; - default: - BUG(); - } - - ret = bioset_init(&pools->bs, pool_size, front_pad, 0); - if (ret) - goto out; - - if (integrity && bioset_integrity_create(&pools->bs, pool_size)) - goto out; - - return pools; - -out: - dm_free_md_mempools(pools); - - return NULL; -} - void dm_free_md_mempools(struct dm_md_mempools *pools) { if (!pools) @@ -3031,11 +3077,14 @@ struct dm_pr { u64 old_key; u64 new_key; u32 flags; + bool abort; bool fail_early; + int ret; + enum pr_type type; }; static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, - void *data) + struct dm_pr *pr) { struct mapped_device *md = bdev->bd_disk->private_data; struct dm_table *table; @@ -3047,15 +3096,21 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, goto out; /* We only support devices that have a single target */ - if (dm_table_get_num_targets(table) != 1) + if (table->num_targets != 1) goto out; ti = dm_table_get_target(table, 0); + if (dm_suspended_md(md)) { + ret = -EAGAIN; + goto out; + } + ret = -EINVAL; if (!ti->type->iterate_devices) goto out; - ret = ti->type->iterate_devices(ti, fn, data); + ti->type->iterate_devices(ti, fn, pr); + ret = 0; out: dm_put_live_table(md, srcu_idx); return ret; @@ -3069,10 +3124,24 @@ static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev, { struct dm_pr *pr = data; const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + int ret; + + if (!ops || !ops->pr_register) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags); + if (!ret) + return 0; + + if (!pr->ret) + pr->ret = ret; + + if (pr->fail_early) + return -1; - if (!ops || !ops->pr_register) - return -EOPNOTSUPP; - return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags); + return 0; } static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, @@ -3083,82 +3152,145 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, .new_key = new_key, .flags = flags, .fail_early = true, + .ret = 0, }; int ret; ret = dm_call_pr(bdev, __dm_pr_register, &pr); - if (ret && new_key) { - /* unregister all paths if we failed to register any path */ - pr.old_key = new_key; - pr.new_key = 0; - pr.flags = 0; - pr.fail_early = false; - dm_call_pr(bdev, __dm_pr_register, &pr); + if (ret) { + /* Didn't even get to register a path */ + return ret; } + if (!pr.ret) + return 0; + ret = pr.ret; + + if (!new_key) + return ret; + + /* unregister all paths if we failed to register any path */ + pr.old_key = new_key; + pr.new_key = 0; + pr.flags = 0; + pr.fail_early = false; + (void) dm_call_pr(bdev, __dm_pr_register, &pr); return ret; } + +static int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_pr *pr = data; + const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + + if (!ops || !ops->pr_reserve) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags); + if (!pr->ret) + return -1; + + return 0; +} + static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, u32 flags) { - struct mapped_device *md = bdev->bd_disk->private_data; - const struct pr_ops *ops; - int r, srcu_idx; + struct dm_pr pr = { + .old_key = key, + .flags = flags, + .type = type, + .fail_early = false, + .ret = 0, + }; + int ret; - r = dm_prepare_ioctl(md, &srcu_idx, &bdev); - if (r < 0) - goto out; + ret = dm_call_pr(bdev, __dm_pr_reserve, &pr); + if (ret) + return ret; - ops = bdev->bd_disk->fops->pr_ops; - if (ops && ops->pr_reserve) - r = ops->pr_reserve(bdev, key, type, flags); - else - r = -EOPNOTSUPP; -out: - dm_unprepare_ioctl(md, srcu_idx); - return r; + return pr.ret; +} + +/* + * If there is a non-All Registrants type of reservation, the release must be + * sent down the holding path. For the cases where there is no reservation or + * the path is not the holder the device will also return success, so we must + * try each path to make sure we got the correct path. + */ +static int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_pr *pr = data; + const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + + if (!ops || !ops->pr_release) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type); + if (pr->ret) + return -1; + + return 0; } static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - struct mapped_device *md = bdev->bd_disk->private_data; - const struct pr_ops *ops; - int r, srcu_idx; + struct dm_pr pr = { + .old_key = key, + .type = type, + .fail_early = false, + }; + int ret; - r = dm_prepare_ioctl(md, &srcu_idx, &bdev); - if (r < 0) - goto out; + ret = dm_call_pr(bdev, __dm_pr_release, &pr); + if (ret) + return ret; - ops = bdev->bd_disk->fops->pr_ops; - if (ops && ops->pr_release) - r = ops->pr_release(bdev, key, type); - else - r = -EOPNOTSUPP; -out: - dm_unprepare_ioctl(md, srcu_idx); - return r; + return pr.ret; +} + +static int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_pr *pr = data; + const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + + if (!ops || !ops->pr_preempt) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type, + pr->abort); + if (!pr->ret) + return -1; + + return 0; } static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort) { - struct mapped_device *md = bdev->bd_disk->private_data; - const struct pr_ops *ops; - int r, srcu_idx; + struct dm_pr pr = { + .new_key = new_key, + .old_key = old_key, + .type = type, + .fail_early = false, + }; + int ret; - r = dm_prepare_ioctl(md, &srcu_idx, &bdev); - if (r < 0) - goto out; + ret = dm_call_pr(bdev, __dm_pr_preempt, &pr); + if (ret) + return ret; - ops = bdev->bd_disk->fops->pr_ops; - if (ops && ops->pr_preempt) - r = ops->pr_preempt(bdev, old_key, new_key, type, abort); - else - r = -EOPNOTSUPP; -out: - dm_unprepare_ioctl(md, srcu_idx); - return r; + return pr.ret; } static int dm_pr_clear(struct block_device *bdev, u64 key) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index a8405ce305a9..5201df03ce40 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -53,7 +53,6 @@ struct dm_io; *---------------------------------------------------------------*/ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); -struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, @@ -218,9 +217,6 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type, - unsigned per_io_data_size, unsigned min_pool_size, - bool integrity, bool poll); void dm_free_md_mempools(struct dm_md_mempools *pools); /* diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index d87f674ab762..bf6dffadbe6f 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -165,7 +165,7 @@ static int read_sb_page(struct mddev *mddev, loff_t offset, if (sync_page_io(rdev, target, roundup(size, bdev_logical_block_size(rdev->bdev)), - page, REQ_OP_READ, 0, true)) { + page, REQ_OP_READ, true)) { page->index = index; return 0; } @@ -302,7 +302,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); bh = bh->b_this_page; } @@ -394,7 +394,7 @@ static int read_page(struct file *file, unsigned long index, atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } blk_cur++; bh = bh->b_this_page; diff --git a/drivers/md/md.c b/drivers/md/md.c index 8273ac5eef06..4df78e30b76a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -993,15 +993,15 @@ int md_super_wait(struct mddev *mddev) } int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, - struct page *page, int op, int op_flags, bool metadata_op) + struct page *page, blk_opf_t opf, bool metadata_op) { struct bio bio; struct bio_vec bvec; if (metadata_op && rdev->meta_bdev) - bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags); + bio_init(&bio, rdev->meta_bdev, &bvec, 1, opf); else - bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags); + bio_init(&bio, rdev->bdev, &bvec, 1, opf); if (metadata_op) bio.bi_iter.bi_sector = sector + rdev->sb_start; @@ -1024,7 +1024,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size) if (rdev->sb_loaded) return 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) + if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true)) goto fail; rdev->sb_loaded = 1; return 0; @@ -1722,7 +1722,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return -EINVAL; bb_sector = (long long)offset; if (!sync_page_io(rdev, bb_sector, sectors << 9, - rdev->bb_page, REQ_OP_READ, 0, true)) + rdev->bb_page, REQ_OP_READ, true)) return -EIO; bbp = (__le64 *)page_address(rdev->bb_page); rdev->badblocks.shift = sb->bblog_shift; @@ -2438,7 +2438,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) mdname(mddev), mddev->max_disks); return -EBUSY; } - bdevname(rdev->bdev,b); + snprintf(b, sizeof(b), "%pg", rdev->bdev); strreplace(b, '/', '!'); rdev->mddev = mddev; @@ -4831,7 +4831,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev, true); + md_reap_sync_thread(mddev); } mddev_unlock(mddev); } @@ -5579,7 +5579,7 @@ static void md_free(struct kobject *ko) if (mddev->gendisk) { del_gendisk(mddev->gendisk); - blk_cleanup_disk(mddev->gendisk); + put_disk(mddev->gendisk); } percpu_ref_exit(&mddev->writes_pending); @@ -5718,7 +5718,7 @@ static int md_alloc(dev_t dev, char *name) out_del_gendisk: del_gendisk(disk); out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_unlock_disks_mutex: mutex_unlock(&disks_mutex); mddev_put(mddev); @@ -6197,7 +6197,7 @@ static void __md_stop_writes(struct mddev *mddev) flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev, true); + md_reap_sync_thread(mddev); } del_timer_sync(&mddev->safemode_timer); @@ -9303,7 +9303,7 @@ void md_check_recovery(struct mddev *mddev) * ->spare_active and clear saved_raid_disk */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev, true); + md_reap_sync_thread(mddev); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags); @@ -9338,7 +9338,7 @@ void md_check_recovery(struct mddev *mddev) goto unlock; } if (mddev->sync_thread) { - md_reap_sync_thread(mddev, true); + md_reap_sync_thread(mddev); goto unlock; } /* Set RUNNING before clearing NEEDED to avoid @@ -9411,18 +9411,14 @@ void md_check_recovery(struct mddev *mddev) } EXPORT_SYMBOL(md_check_recovery); -void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held) +void md_reap_sync_thread(struct mddev *mddev) { struct md_rdev *rdev; sector_t old_dev_sectors = mddev->dev_sectors; bool is_reshaped = false; - if (reconfig_mutex_held) - mddev_unlock(mddev); /* resync has finished, collect result */ md_unregister_thread(&mddev->sync_thread); - if (reconfig_mutex_held) - mddev_lock_nointr(mddev); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && mddev->degraded != mddev->raid_disks) { diff --git a/drivers/md/md.h b/drivers/md/md.h index 5f62c46ac2d3..b4f84b27bdef 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -719,7 +719,7 @@ extern struct md_thread *md_register_thread( extern void md_unregister_thread(struct md_thread **threadp); extern void md_wakeup_thread(struct md_thread *thread); extern void md_check_recovery(struct mddev *mddev); -extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held); +extern void md_reap_sync_thread(struct mddev *mddev); extern int mddev_init_writes_pending(struct mddev *mddev); extern bool md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_inc(struct mddev *mddev, struct bio *bi); @@ -738,8 +738,7 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern int md_super_wait(struct mddev *mddev); extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, - struct page *page, int op, int op_flags, - bool metadata_op); + struct page *page, blk_opf_t opf, bool metadata_op); extern void md_do_sync(struct md_thread *thread); extern void md_new_event(void); extern void md_allow_write(struct mddev *mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 258d4eb2d63c..05d8438cfec8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1220,8 +1220,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, struct raid1_info *mirror; struct bio *read_bio; struct bitmap *bitmap = mddev->bitmap; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; int rdisk; bool r1bio_existed = !!r1_bio; @@ -1240,7 +1240,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev); if (rdev) - bdevname(rdev->bdev, b); + snprintf(b, sizeof(b), "%pg", rdev->bdev); else strcpy(b, "???"); rcu_read_unlock(); @@ -1988,9 +1988,9 @@ static void end_sync_write(struct bio *bio) } static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, - int sectors, struct page *page, int rw) + int sectors, struct page *page, int rw) { - if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) /* success */ return 1; if (rw == WRITE) { @@ -2057,7 +2057,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) rdev = conf->mirrors[d].rdev; if (sync_page_io(rdev, sect, s<<9, pages[idx], - REQ_OP_READ, 0, false)) { + REQ_OP_READ, false)) { success = 1; break; } @@ -2305,7 +2305,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, REQ_OP_READ, 0, false)) + conf->tmppage, REQ_OP_READ, false)) success = 1; rdev_dec_pending(rdev, mddev); if (success) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d589f823feb1..26545950ca42 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1136,8 +1136,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, { struct r10conf *conf = mddev->private; struct bio *read_bio; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; struct md_rdev *rdev; char b[BDEVNAME_SIZE]; @@ -1164,7 +1164,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, disk = r10_bio->devs[slot].devnum; err_rdev = rcu_dereference(conf->mirrors[disk].rdev); if (err_rdev) - bdevname(err_rdev->bdev, b); + snprintf(b, sizeof(b), "%pg", err_rdev->bdev); else { strcpy(b, "???"); /* This never gets dereferenced */ @@ -1230,9 +1230,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, struct bio *bio, bool replacement, int n_copy) { - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); - const unsigned long do_fua = (bio->bi_opf & REQ_FUA); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; + const blk_opf_t do_fua = bio->bi_opf & REQ_FUA; unsigned long flags; struct blk_plug_cb *cb; struct raid1_plug_cb *plug = NULL; @@ -2512,7 +2512,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, pages[idx], - REQ_OP_READ, 0, false); + REQ_OP_READ, false); if (ok) { rdev = conf->mirrors[dw].rdev; addr = r10_bio->devs[1].addr + sect; @@ -2520,7 +2520,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, pages[idx], - REQ_OP_WRITE, 0, false); + REQ_OP_WRITE, false); if (!ok) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, @@ -2644,7 +2644,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; - if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) /* success */ return 1; if (rw == WRITE) { @@ -2726,7 +2726,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 sect, s<<9, conf->tmppage, - REQ_OP_READ, 0, false); + REQ_OP_READ, false); rdev_dec_pending(rdev, mddev); rcu_read_lock(); if (success) @@ -5107,7 +5107,7 @@ static int handle_reshape_read_error(struct mddev *mddev, addr, s << 9, pages[idx], - REQ_OP_READ, 0, false); + REQ_OP_READ, false); rdev_dec_pending(rdev, mddev); rcu_read_lock(); if (success) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 83c184eddbda..6f2dd73128b0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1788,7 +1788,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb = page_address(page); mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); - if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, + if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false)) { __free_page(page); return -EIO; @@ -1898,7 +1898,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, atomic_inc(&rdev->nr_pending); rcu_read_unlock(); sync_page_io(rdev, sh->sector, PAGE_SIZE, - sh->dev[disk_index].page, REQ_OP_WRITE, 0, + sh->dev[disk_index].page, REQ_OP_WRITE, false); rdev_dec_pending(rdev, rdev->mddev); rcu_read_lock(); @@ -1908,7 +1908,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, atomic_inc(&rrdev->nr_pending); rcu_read_unlock(); sync_page_io(rrdev, sh->sector, PAGE_SIZE, - sh->dev[disk_index].page, REQ_OP_WRITE, 0, + sh->dev[disk_index].page, REQ_OP_WRITE, false); rdev_dec_pending(rrdev, rrdev->mddev); rcu_read_lock(); @@ -2394,7 +2394,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, PAGE_SIZE)); kunmap_atomic(addr); sync_page_io(log->rdev, write_pos, PAGE_SIZE, - dev->page, REQ_OP_WRITE, 0, false); + dev->page, REQ_OP_WRITE, false); write_pos = r5l_ring_add(log, write_pos, BLOCK_SECTORS); offset += sizeof(__le32) + @@ -2406,7 +2406,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, - REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false); + REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false); sh->log_start = ctx->pos; list_add_tail(&sh->r5c, &log->stripe_in_journal_list); atomic_inc(&log->stripe_in_journal_count); @@ -2971,7 +2971,7 @@ static int r5l_load_log(struct r5l_log *log) if (!page) return -ENOMEM; - if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, 0, false)) { + if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, false)) { ret = -EIO; goto ioerr; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 973e2e06f19c..98988cb26295 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -629,9 +629,9 @@ static void ppl_do_flush(struct ppl_io_unit *io) if (bdev) { struct bio *bio; - bio = bio_alloc_bioset(bdev, 0, GFP_NOIO, + bio = bio_alloc_bioset(bdev, 0, REQ_OP_WRITE | REQ_PREFLUSH, - &ppl_conf->flush_bs); + GFP_NOIO, &ppl_conf->flush_bs); bio->bi_private = io; bio->bi_end_io = ppl_flush_endio; @@ -897,7 +897,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, __func__, indent, "", rdev->bdev, (unsigned long long)sector); if (!sync_page_io(rdev, sector, block_size, page2, - REQ_OP_READ, 0, false)) { + REQ_OP_READ, false)) { md_error(mddev, rdev); pr_debug("%s:%*s read failed!\n", __func__, indent, ""); @@ -919,7 +919,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, (unsigned long long)(ppl_sector + i)); if (!sync_page_io(log->rdev, ppl_sector - log->rdev->data_offset + i, - block_size, page2, REQ_OP_READ, 0, + block_size, page2, REQ_OP_READ, false)) { pr_debug("%s:%*s read failed!\n", __func__, indent, ""); @@ -946,7 +946,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, (unsigned long long)parity_sector, parity_rdev->bdev); if (!sync_page_io(parity_rdev, parity_sector, block_size, - page1, REQ_OP_WRITE, 0, false)) { + page1, REQ_OP_WRITE, false)) { pr_debug("%s:%*s parity write error!\n", __func__, indent, ""); md_error(mddev, parity_rdev); @@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size; if (!sync_page_io(rdev, sector - rdev->data_offset, - s, page, REQ_OP_READ, 0, false)) { + s, page, REQ_OP_READ, false)) { md_error(mddev, rdev); ret = -EIO; goto out; @@ -1062,7 +1062,7 @@ static int ppl_write_empty_header(struct ppl_log *log) if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | - REQ_FUA, 0, false)) { + REQ_FUA, false)) { md_error(rdev->mddev, rdev); ret = -EIO; } @@ -1100,7 +1100,7 @@ static int ppl_load_distributed(struct ppl_log *log) if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset + pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ, - 0, false)) { + false)) { md_error(mddev, rdev); ret = -EIO; /* if not able to read - don't recover any PPL */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5d09256d7f81..5cabdbbac48b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1082,7 +1082,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) should_defer = conf->batch_bio_dispatch && conf->group_cnt; for (i = disks; i--; ) { - int op, op_flags = 0; + enum req_op op; + blk_opf_t op_flags = 0; int replace_only = 0; struct bio *bi, *rbi; struct md_rdev *rdev, *rrdev = NULL; @@ -7304,7 +7305,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; conf->mddev = mddev; - if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) + ret = -ENOMEM; + conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!conf->stripe_hashtbl) goto abort; /* We init hash_locks[0] separately to that it can be used @@ -7933,7 +7936,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) int err = 0; int number = rdev->raid_disk; struct md_rdev __rcu **rdevp; - struct disk_info *p = conf->disks + number; + struct disk_info *p; struct md_rdev *tmp; print_raid5_conf(conf); @@ -7952,6 +7955,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) log_exit(conf); return 0; } + if (unlikely(number >= conf->pool_size)) + return 0; + p = conf->disks + number; if (rdev == rcu_access_pointer(p->rdev)) rdevp = &p->rdev; else if (rdev == rcu_access_pointer(p->replacement)) @@ -8062,6 +8068,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk <= last && conf->disks[rdev->saved_raid_disk].rdev == NULL) first = rdev->saved_raid_disk; |