diff options
Diffstat (limited to 'drivers/md')
30 files changed, 771 insertions, 213 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3db222509e44..2557f198e175 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -436,6 +436,18 @@ config DM_DELAY If unsure, say N. +config DM_INIT + bool "DM \"dm-mod.create=\" parameter support" + depends on BLK_DEV_DM=y + ---help--- + Enable "dm-mod.create=" parameter to create mapped devices at init time. + This option is useful to allow mounting rootfs without requiring an + initramfs. + See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." + format. + + If unsure, say N. + config DM_UEVENT bool "DM uevents" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 822f4e8753bc..a52b703e588e 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -69,6 +69,10 @@ obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o obj-$(CONFIG_DM_ZONED) += dm-zoned.o obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o +ifeq ($(CONFIG_DM_INIT),y) +dm-mod-objs += dm-init.o +endif + ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 23cb1dc7296b..64def336f053 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -432,8 +432,9 @@ static void do_btree_node_write(struct btree *b) int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); + struct bvec_iter_all iter_all; - bio_for_each_segment_all(bv, b->bio, j) + bio_for_each_segment_all(bv, b->bio, j, iter_all) memcpy(page_address(bv->bv_page), base + j * PAGE_SIZE, PAGE_SIZE); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 956004366699..886710043025 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -538,6 +538,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); unsigned int i, stale; + char buf[80]; if (!KEY_PTRS(k) || bch_extent_invalid(bk, k)) @@ -547,19 +548,19 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) if (!ptr_available(b->c, k, i)) return true; - if (!expensive_debug_checks(b->c) && KEY_DIRTY(k)) - return false; - for (i = 0; i < KEY_PTRS(k); i++) { stale = ptr_stale(b->c, k, i); + if (stale && KEY_DIRTY(k)) { + bch_extent_to_text(buf, sizeof(buf), k); + pr_info("stale dirty pointer, stale %u, key: %s", + stale, buf); + } + btree_bug_on(stale > BUCKET_GC_GEN_MAX, b, "key too stale: %i, need_gc %u", stale, b->c->need_gc); - btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), - b, "stale dirty pointer"); - if (stale) return true; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 15070412a32e..f101bfe8657a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -392,10 +392,11 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) /* * Flag for bypass if the IO is for read-ahead or background, - * unless the read-ahead request is for metadata (eg, for gfs2). + * unless the read-ahead request is for metadata + * (eg, for gfs2 or xfs). */ if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & REQ_PRIO)) + !(bio->bi_opf & (REQ_META|REQ_PRIO))) goto skip; if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || @@ -877,7 +878,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, } if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & REQ_PRIO) && + !(bio->bi_opf & (REQ_META|REQ_PRIO)) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, get_capacity(bio->bi_disk) - bio_end_sector(bio)); diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 894410f3f829..ba1c93791d8d 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -111,7 +111,7 @@ void bch_cache_accounting_clear(struct cache_accounting *acc) { memset(&acc->total.cache_hits, 0, - sizeof(unsigned long) * 7); + sizeof(struct cache_stats)); } void bch_cache_accounting_destroy(struct cache_accounting *acc) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4dee119c3664..a697a3a923cd 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1615,21 +1615,21 @@ static void conditional_stop_bcache_device(struct cache_set *c, */ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", d->disk->disk_name); - /* - * There might be a small time gap that cache set is - * released but bcache device is not. Inside this time - * gap, regular I/O requests will directly go into - * backing device as no cache set attached to. This - * behavior may also introduce potential inconsistence - * data in writeback mode while cache is dirty. - * Therefore before calling bcache_device_stop() due - * to a broken cache device, dc->io_disable should be - * explicitly set to true. - */ - dc->io_disable = true; - /* make others know io_disable is true earlier */ - smp_mb(); - bcache_device_stop(d); + /* + * There might be a small time gap that cache set is + * released but bcache device is not. Inside this time + * gap, regular I/O requests will directly go into + * backing device as no cache set attached to. This + * behavior may also introduce potential inconsistence + * data in writeback mode while cache is dirty. + * Therefore before calling bcache_device_stop() due + * to a broken cache device, dc->io_disable should be + * explicitly set to true. + */ + dc->io_disable = true; + /* make others know io_disable is true earlier */ + smp_mb(); + bcache_device_stop(d); } else { /* * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 557a8a3270a1..17bae9c14ca0 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -67,6 +67,8 @@ read_attribute(written); read_attribute(btree_written); read_attribute(metadata_written); read_attribute(active_journal_entries); +read_attribute(backing_dev_name); +read_attribute(backing_dev_uuid); sysfs_time_stats_attribute(btree_gc, sec, ms); sysfs_time_stats_attribute(btree_split, sec, us); @@ -243,6 +245,19 @@ SHOW(__bch_cached_dev) return strlen(buf); } + if (attr == &sysfs_backing_dev_name) { + snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name); + strcat(buf, "\n"); + return strlen(buf); + } + + if (attr == &sysfs_backing_dev_uuid) { + /* convert binary uuid into 36-byte string plus '\0' */ + snprintf(buf, 36+1, "%pU", dc->sb.uuid); + strcat(buf, "\n"); + return strlen(buf); + } + #undef var return 0; } @@ -262,10 +277,10 @@ STORE(__cached_dev) sysfs_strtoul(data_csum, dc->disk.data_csum); d_strtoul(verify); - d_strtoul(bypass_torture_test); - d_strtoul(writeback_metadata); - d_strtoul(writeback_running); - d_strtoul(writeback_delay); + sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test); + sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata); + sysfs_strtoul_bool(writeback_running, dc->writeback_running); + sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX); sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, bch_cutoff_writeback); @@ -287,9 +302,15 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, 1, WRITEBACK_RATE_UPDATE_SECS_MAX); - d_strtoul(writeback_rate_i_term_inverse); - d_strtoul_nonzero(writeback_rate_p_term_inverse); - d_strtoul_nonzero(writeback_rate_minimum); + sysfs_strtoul_clamp(writeback_rate_i_term_inverse, + dc->writeback_rate_i_term_inverse, + 1, UINT_MAX); + sysfs_strtoul_clamp(writeback_rate_p_term_inverse, + dc->writeback_rate_p_term_inverse, + 1, UINT_MAX); + sysfs_strtoul_clamp(writeback_rate_minimum, + dc->writeback_rate_minimum, + 1, UINT_MAX); sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX); @@ -299,7 +320,9 @@ STORE(__cached_dev) dc->io_disable = v ? 1 : 0; } - d_strtoi_h(sequential_cutoff); + sysfs_strtoul_clamp(sequential_cutoff, + dc->sequential_cutoff, + 0, UINT_MAX); d_strtoi_h(readahead); if (attr == &sysfs_clear_stats) @@ -452,6 +475,8 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_verify, &sysfs_bypass_torture_test, #endif + &sysfs_backing_dev_name, + &sysfs_backing_dev_uuid, NULL }; KTYPE(bch_cached_dev); @@ -761,10 +786,12 @@ STORE(__bch_cache_set) c->shrink.scan_objects(&c->shrink, &sc); } - sysfs_strtoul(congested_read_threshold_us, - c->congested_read_threshold_us); - sysfs_strtoul(congested_write_threshold_us, - c->congested_write_threshold_us); + sysfs_strtoul_clamp(congested_read_threshold_us, + c->congested_read_threshold_us, + 0, UINT_MAX); + sysfs_strtoul_clamp(congested_write_threshold_us, + c->congested_write_threshold_us, + 0, UINT_MAX); if (attr == &sysfs_errors) { v = __sysfs_match_string(error_actions, -1, buf); @@ -774,12 +801,20 @@ STORE(__bch_cache_set) c->on_error = v; } - if (attr == &sysfs_io_error_limit) - c->error_limit = strtoul_or_return(buf); + sysfs_strtoul_clamp(io_error_limit, c->error_limit, 0, UINT_MAX); /* See count_io_errors() for why 88 */ - if (attr == &sysfs_io_error_halflife) - c->error_decay = strtoul_or_return(buf) / 88; + if (attr == &sysfs_io_error_halflife) { + unsigned long v = 0; + ssize_t ret; + + ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX); + if (!ret) { + c->error_decay = v / 88; + return size; + } + return ret; + } if (attr == &sysfs_io_disable) { v = strtoul_or_return(buf); @@ -794,13 +829,15 @@ STORE(__bch_cache_set) } } - sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); - sysfs_strtoul(verify, c->verify); - sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); + sysfs_strtoul_clamp(journal_delay_ms, + c->journal_delay_ms, + 0, USHRT_MAX); + sysfs_strtoul_bool(verify, c->verify); + sysfs_strtoul_bool(key_merging_disabled, c->key_merging_disabled); sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks); - sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); - sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); - sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); + sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite); + sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled); + sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled); /* * write gc_after_writeback here may overwrite an already set * BCH_DO_AUTO_GC, it doesn't matter because this flag will be diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 3fe82425859c..215df32f567b 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -79,11 +79,28 @@ do { \ return strtoul_safe(buf, var) ?: (ssize_t) size; \ } while (0) +#define sysfs_strtoul_bool(file, var) \ +do { \ + if (attr == &sysfs_ ## file) { \ + unsigned long v = strtoul_or_return(buf); \ + \ + var = v ? 1 : 0; \ + return size; \ + } \ +} while (0) + #define sysfs_strtoul_clamp(file, var, min, max) \ do { \ - if (attr == &sysfs_ ## file) \ - return strtoul_safe_clamp(buf, var, min, max) \ - ?: (ssize_t) size; \ + if (attr == &sysfs_ ## file) { \ + unsigned long v = 0; \ + ssize_t ret; \ + ret = strtoul_safe_clamp(buf, v, min, max); \ + if (!ret) { \ + var = v; \ + return size; \ + } \ + return ret; \ + } \ } while (0) #define strtoul_or_return(cp) \ diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 20eddeac1531..62fb917f7a4f 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -270,7 +270,11 @@ int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) int i; struct bio_vec *bv; - bio_for_each_segment_all(bv, bio, i) { + /* + * This is called on freshly new bio, so it is safe to access the + * bvec table directly. + */ + for (i = 0, bv = bio->bi_io_vec; i < bio->bi_vcnt; bv++, i++) { bv->bv_page = alloc_page(gfp_mask); if (!bv->bv_page) { while (--bv >= bio->bi_io_vec) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 6a743d3bb338..4e4c6810dc3c 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -71,6 +71,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, in_use > bch_cutoff_writeback_sync) return false; + if (bio_op(bio) == REQ_OP_DISCARD) + return false; + if (dc->partial_stripes_expensive && bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, bio_sectors(bio))) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b29a8327eed1..d249cf8ac277 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -353,6 +353,7 @@ struct cache_features { enum cache_metadata_mode mode; enum cache_io_mode io_mode; unsigned metadata_version; + bool discard_passdown:1; }; struct cache_stats { @@ -1899,7 +1900,11 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio) b = to_dblock(from_dblock(b) + 1); } - bio_endio(bio); + if (cache->features.discard_passdown) { + remap_to_origin(cache, bio); + generic_make_request(bio); + } else + bio_endio(bio); return false; } @@ -2233,13 +2238,14 @@ static void init_features(struct cache_features *cf) cf->mode = CM_WRITE; cf->io_mode = CM_IO_WRITEBACK; cf->metadata_version = 1; + cf->discard_passdown = true; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { static const struct dm_arg _args[] = { - {0, 2, "Invalid number of cache feature arguments"}, + {0, 3, "Invalid number of cache feature arguments"}, }; int r, mode_ctr = 0; @@ -2274,6 +2280,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; + else if (!strcasecmp(arg, "no_discard_passdown")) + cf->discard_passdown = false; + else { *error = "Unrecognised cache feature requested"; return -EINVAL; @@ -2496,7 +2505,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; - ti->split_discard_bios = false; ti->per_io_data_size = sizeof(struct per_bio_data); @@ -3120,6 +3128,39 @@ static void cache_resume(struct dm_target *ti) do_waker(&cache->waker.work); } +static void emit_flags(struct cache *cache, char *result, + unsigned maxlen, ssize_t *sz_ptr) +{ + ssize_t sz = *sz_ptr; + struct cache_features *cf = &cache->features; + unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1; + + DMEMIT("%u ", count); + + if (cf->metadata_version == 2) + DMEMIT("metadata2 "); + + if (writethrough_mode(cache)) + DMEMIT("writethrough "); + + else if (passthrough_mode(cache)) + DMEMIT("passthrough "); + + else if (writeback_mode(cache)) + DMEMIT("writeback "); + + else { + DMEMIT("unknown "); + DMERR("%s: internal error: unknown io mode: %d", + cache_device_name(cache), (int) cf->io_mode); + } + + if (!cf->discard_passdown) + DMEMIT("no_discard_passdown "); + + *sz_ptr = sz; +} + /* * Status format: * @@ -3186,25 +3227,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.promotion), (unsigned long) atomic_read(&cache->nr_dirty)); - if (cache->features.metadata_version == 2) - DMEMIT("2 metadata2 "); - else - DMEMIT("1 "); - - if (writethrough_mode(cache)) - DMEMIT("writethrough "); - - else if (passthrough_mode(cache)) - DMEMIT("passthrough "); - - else if (writeback_mode(cache)) - DMEMIT("writeback "); - - else { - DMERR("%s: internal error: unknown io mode: %d", - cache_device_name(cache), (int) cache->features.io_mode); - goto err; - } + emit_flags(cache, result, maxlen, &sz); DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); @@ -3433,14 +3456,62 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } +static bool origin_dev_supports_discard(struct block_device *origin_bdev) +{ + struct request_queue *q = bdev_get_queue(origin_bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the origin device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct cache *cache) +{ + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!cache->features.discard_passdown) + return; + + if (!origin_dev_supports_discard(origin_bdev)) + reason = "discard unsupported"; + + else if (origin_limits->max_discard_sectors < cache->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + if (reason) { + DMWARN("Origin device (%s) %s: Disabling discard passdown.", + bdevname(origin_bdev, buf), reason); + cache->features.discard_passdown = false; + } +} + static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + + if (!cache->features.discard_passdown) { + /* No passdown is done so setting own virtual limits */ + limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, + cache->origin_sectors); + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + return; + } + /* - * FIXME: these limits may be incompatible with the cache device + * cache_iterate_devices() is stacking both origin and fast device limits + * but discards aren't passed to fast device, so inherit origin's limits. */ - limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, - cache->origin_sectors); - limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + limits->max_discard_sectors = origin_limits->max_discard_sectors; + limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors; + limits->discard_granularity = origin_limits->discard_granularity; + limits->discard_alignment = origin_limits->discard_alignment; + limits->discard_misaligned = origin_limits->discard_misaligned; } static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -3457,6 +3528,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); } + + disable_passdown_if_not_supported(cache); set_discard_limits(cache, limits); } @@ -3464,7 +3537,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {2, 0, 0}, + .version = {2, 1, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index dd538e6b2748..dd6565798778 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1447,8 +1447,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) { unsigned int i; struct bio_vec *bv; + struct bvec_iter_all iter_all; - bio_for_each_segment_all(bv, clone, i) { + bio_for_each_segment_all(bv, clone, i, iter_all) { BUG_ON(!bv->bv_page); mempool_free(bv->bv_page, &cc->page_pool); } diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c new file mode 100644 index 000000000000..b53f30f16b4d --- /dev/null +++ b/drivers/md/dm-init.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * dm-init.c + * Copyright (C) 2017 The Chromium OS Authors <chromium-os-dev@chromium.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/device-mapper.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/moduleparam.h> + +#define DM_MSG_PREFIX "init" +#define DM_MAX_DEVICES 256 +#define DM_MAX_TARGETS 256 +#define DM_MAX_STR_SIZE 4096 + +static char *create; + +/* + * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+] + * Table format: <start_sector> <num_sectors> <target_type> <target_args> + * + * See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." format + * details. + */ + +struct dm_device { + struct dm_ioctl dmi; + struct dm_target_spec *table[DM_MAX_TARGETS]; + char *target_args_array[DM_MAX_TARGETS]; + struct list_head list; +}; + +const char *dm_allowed_targets[] __initconst = { + "crypt", + "delay", + "linear", + "snapshot-origin", + "striped", + "verity", +}; + +static int __init dm_verify_target_type(const char *target) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dm_allowed_targets); i++) { + if (!strcmp(dm_allowed_targets[i], target)) + return 0; + } + return -EINVAL; +} + +static void __init dm_setup_cleanup(struct list_head *devices) +{ + struct dm_device *dev, *tmp; + unsigned int i; + + list_for_each_entry_safe(dev, tmp, devices, list) { + list_del(&dev->list); + for (i = 0; i < dev->dmi.target_count; i++) { + kfree(dev->table[i]); + kfree(dev->target_args_array[i]); + } + kfree(dev); + } +} + +/** + * str_field_delimit - delimit a string based on a separator char. + * @str: the pointer to the string to delimit. + * @separator: char that delimits the field + * + * Find a @separator and replace it by '\0'. + * Remove leading and trailing spaces. + * Return the remainder string after the @separator. + */ +static char __init *str_field_delimit(char **str, char separator) +{ + char *s; + + /* TODO: add support for escaped characters */ + *str = skip_spaces(*str); + s = strchr(*str, separator); + /* Delimit the field and remove trailing spaces */ + if (s) + *s = '\0'; + *str = strim(*str); + return s ? ++s : NULL; +} + +/** + * dm_parse_table_entry - parse a table entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * <start_sector> <num_sectors> <target_type> <target_args>[, ...] + * + * Return the remainder string after the table entry, i.e, after the comma which + * delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) +{ + const unsigned int n = dev->dmi.target_count - 1; + struct dm_target_spec *sp; + unsigned int i; + /* fields: */ + char *field[4]; + char *next; + + field[0] = str; + /* Delimit first 3 fields that are separated by space */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i + 1] = str_field_delimit(&field[i], ' '); + if (!field[i + 1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be terminated by comma */ + next = str_field_delimit(&field[i], ','); + + sp = kzalloc(sizeof(*sp), GFP_KERNEL); + if (!sp) + return ERR_PTR(-ENOMEM); + dev->table[n] = sp; + + /* start_sector */ + if (kstrtoull(field[0], 0, &sp->sector_start)) + return ERR_PTR(-EINVAL); + /* num_sector */ + if (kstrtoull(field[1], 0, &sp->length)) + return ERR_PTR(-EINVAL); + /* target_type */ + strscpy(sp->target_type, field[2], sizeof(sp->target_type)); + if (dm_verify_target_type(sp->target_type)) { + DMERR("invalid type \"%s\"", sp->target_type); + return ERR_PTR(-EINVAL); + } + /* target_args */ + dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, + DM_MAX_STR_SIZE); + if (!dev->target_args_array[n]) + return ERR_PTR(-ENOMEM); + + return next; +} + +/** + * dm_parse_table - parse "dm-mod.create=" table field + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * <table>[,<table>+] + */ +static int __init dm_parse_table(struct dm_device *dev, char *str) +{ + char *table_entry = str; + + while (table_entry) { + DMDEBUG("parsing table \"%s\"", str); + if (++dev->dmi.target_count >= DM_MAX_TARGETS) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + table_entry = dm_parse_table_entry(dev, table_entry); + if (IS_ERR(table_entry)) { + DMERR("couldn't parse table"); + return PTR_ERR(table_entry); + } + } + + return 0; +} + +/** + * dm_parse_device_entry - parse a device entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * name,uuid,minor,flags,table[; ...] + * + * Return the remainder string after the table entry, i.e, after the semi-colon + * which delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_device_entry(struct dm_device *dev, char *str) +{ + /* There are 5 fields: name,uuid,minor,flags,table; */ + char *field[5]; + unsigned int i; + char *next; + + field[0] = str; + /* Delimit first 4 fields that are separated by comma */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i+1] = str_field_delimit(&field[i], ','); + if (!field[i+1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be delimited by semi-colon */ + next = str_field_delimit(&field[i], ';'); + + /* name */ + strscpy(dev->dmi.name, field[0], sizeof(dev->dmi.name)); + /* uuid */ + strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid)); + /* minor */ + if (strlen(field[2])) { + if (kstrtoull(field[2], 0, &dev->dmi.dev)) + return ERR_PTR(-EINVAL); + dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG; + } + /* flags */ + if (!strcmp(field[3], "ro")) + dev->dmi.flags |= DM_READONLY_FLAG; + else if (strcmp(field[3], "rw")) + return ERR_PTR(-EINVAL); + /* table */ + if (dm_parse_table(dev, field[4])) + return ERR_PTR(-EINVAL); + + return next; +} + +/** + * dm_parse_devices - parse "dm-mod.create=" argument + * @devices: list of struct dm_device to store the parsed information. + * @str: the pointer to a string with the format: + * <device>[;<device>+] + */ +static int __init dm_parse_devices(struct list_head *devices, char *str) +{ + unsigned long ndev = 0; + struct dm_device *dev; + char *device = str; + + DMDEBUG("parsing \"%s\"", str); + while (device) { + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + list_add_tail(&dev->list, devices); + + if (++ndev >= DM_MAX_DEVICES) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + + device = dm_parse_device_entry(dev, device); + if (IS_ERR(device)) { + DMERR("couldn't parse device"); + return PTR_ERR(device); + } + } + + return 0; +} + +/** + * dm_init_init - parse "dm-mod.create=" argument and configure drivers + */ +static int __init dm_init_init(void) +{ + struct dm_device *dev; + LIST_HEAD(devices); + char *str; + int r; + + if (!create) + return 0; + + if (strlen(create) >= DM_MAX_STR_SIZE) { + DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); + return -EINVAL; + } + str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + if (!str) + return -ENOMEM; + + r = dm_parse_devices(&devices, str); + if (r) + goto out; + + DMINFO("waiting for all devices to be available before creating mapped devices\n"); + wait_for_device_probe(); + + list_for_each_entry(dev, &devices, list) { + if (dm_early_create(&dev->dmi, dev->table, + dev->target_args_array)) + break; + } +out: + kfree(str); + dm_setup_cleanup(&devices); + return r; +} + +late_initcall(dm_init_init); + +module_param(create, charp, 0); +MODULE_PARM_DESC(create, "Create a mapped device in early boot"); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 457200ca6287..d57d997a52c8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1122,7 +1122,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se return r; data = dm_bufio_read(ic->bufio, *metadata_block, &b); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); @@ -1368,8 +1368,8 @@ again: checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - DMERR("Checksum failed at sector 0x%llx", - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); + DMERR_LIMIT("Checksum failed at sector 0x%llx", + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); } @@ -1561,8 +1561,8 @@ retry_kmap: integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { - DMERR("Checksum failed when reading from journal, at sector 0x%llx", - (unsigned long long)logical_sector); + DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", + (unsigned long long)logical_sector); } } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f666778ad237..c740153b4e52 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -2018,3 +2018,106 @@ out: return r; } + + +/** + * dm_early_create - create a mapped device in early boot. + * + * @dmi: Contains main information of the device mapping to be created. + * @spec_array: array of pointers to struct dm_target_spec. Describes the + * mapping table of the device. + * @target_params_array: array of strings with the parameters to a specific + * target. + * + * Instead of having the struct dm_target_spec and the parameters for every + * target embedded at the end of struct dm_ioctl (as performed in a normal + * ioctl), pass them as arguments, so the caller doesn't need to serialize them. + * The size of the spec_array and target_params_array is given by + * @dmi->target_count. + * This function is supposed to be called in early boot, so locking mechanisms + * to protect against concurrent loads are not required. + */ +int __init dm_early_create(struct dm_ioctl *dmi, + struct dm_target_spec **spec_array, + char **target_params_array) +{ + int r, m = DM_ANY_MINOR; + struct dm_table *t, *old_map; + struct mapped_device *md; + unsigned int i; + + if (!dmi->target_count) + return -EINVAL; + + r = check_name(dmi->name); + if (r) + return r; + + if (dmi->flags & DM_PERSISTENT_DEV_FLAG) + m = MINOR(huge_decode_dev(dmi->dev)); + + /* alloc dm device */ + r = dm_create(m, &md); + if (r) + return r; + + /* hash insert */ + r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md); + if (r) + goto err_destroy_dm; + + /* alloc table */ + r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md); + if (r) + goto err_destroy_dm; + + /* add targets */ + for (i = 0; i < dmi->target_count; i++) { + r = dm_table_add_target(t, spec_array[i]->target_type, + (sector_t) spec_array[i]->sector_start, + (sector_t) spec_array[i]->length, + target_params_array[i]); + if (r) { + DMWARN("error adding target to table"); + goto err_destroy_table; + } + } + + /* finish table */ + r = dm_table_complete(t); + if (r) + goto err_destroy_table; + + md->type = dm_table_get_type(t); + /* setup md->queue to reflect md's type (may block) */ + r = dm_setup_md_queue(md, t); + if (r) { + DMWARN("unable to set up device queue for new table."); + goto err_destroy_table; + } + + /* Set new map */ + dm_suspend(md, 0); + old_map = dm_swap_table(md, t); + if (IS_ERR(old_map)) { + r = PTR_ERR(old_map); + goto err_destroy_table; + } + set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG)); + + /* resume device */ + r = dm_resume(md); + if (r) + goto err_destroy_table; + + DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name); + dm_put(md); + return 0; + +err_destroy_table: + dm_table_destroy(t); +err_destroy_dm: + dm_put(md); + dm_destroy(md); + return r; +} diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index adcfe8ae10aa..9fdef6897316 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs) } } - /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. - */ - ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs)); ti->num_discard_bios = 1; } @@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs)); + + /* + * RAID1 and RAID10 personalities require bio splitting, + * RAID0/4/5/6 don't and process large discard bios properly. + */ + if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + limits->discard_granularity = chunk_size; + limits->max_discard_sectors = chunk_size; + } } static void raid_postsuspend(struct dm_target *ti) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a20531e5f3b4..09773636602d 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -12,6 +12,22 @@ #define DM_MSG_PREFIX "core-rq" +/* + * One of these is allocated per request. + */ +struct dm_rq_target_io { + struct mapped_device *md; + struct dm_target *ti; + struct request *orig, *clone; + struct kthread_work work; + blk_status_t error; + union map_info info; + struct dm_stats_aux stats_aux; + unsigned long duration_jiffies; + unsigned n_sectors; + unsigned completed; +}; + #define DM_MQ_NR_HW_QUEUES 1 #define DM_MQ_QUEUE_DEPTH 2048 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; @@ -527,7 +543,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) md->tag_set->ops = &dm_mq_ops; md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); md->tag_set->numa_node = md->numa_node_id; - md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE; md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); md->tag_set->driver_data = md; diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index b39245545229..1eea0da641db 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -17,22 +17,6 @@ struct mapped_device; /* - * One of these is allocated per request. - */ -struct dm_rq_target_io { - struct mapped_device *md; - struct dm_target *ti; - struct request *orig, *clone; - struct kthread_work work; - blk_status_t error; - union map_info info; - struct dm_stats_aux stats_aux; - unsigned long duration_jiffies; - unsigned n_sectors; - unsigned completed; -}; - -/* * For request-based dm - the bio clones we allocate are embedded in these * structs. * diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 36805b12661e..a168963b757d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2338,13 +2338,6 @@ static int origin_map(struct dm_target *ti, struct bio *bio) return do_origin(o->dev, bio); } -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) -{ - DMWARN("device does not support dax."); - return -EIO; -} - /* * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. @@ -2404,7 +2397,6 @@ static struct target_type origin_target = { .postsuspend = origin_postsuspend, .status = origin_status, .iterate_devices = origin_iterate_devices, - .direct_access = origin_dax_direct_access, }; static struct target_type snapshot_target = { diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index fae35caf3672..8a0f057b8122 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -61,8 +61,7 @@ static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_pat { struct switch_ctx *sctx; - sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path), - GFP_KERNEL); + sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL); if (!sctx) return NULL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4b1be754cc41..ba9481f1bf3c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1698,14 +1698,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, return q && !blk_queue_add_random(q); } -static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); -} - static bool dm_table_all_devices_attribute(struct dm_table *t, iterate_devices_callout_fn func) { @@ -1902,11 +1894,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_write_zeroes(t)) q->limits.max_write_zeroes_sectors = 0; - if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) - blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q); - else - blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q); - dm_table_verify_integrity(t); /* diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e83b63608262..fcd887703f95 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3283,6 +3283,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; + /* make sure metadata and data are different devices */ + if (!strcmp(argv[0], argv[1])) { + ti->error = "Error setting metadata or data device"; + r = -EINVAL; + goto out_unlock; + } + /* * Set default pool features. */ @@ -4167,6 +4174,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) tc->sort_bio_list = RB_ROOT; if (argc == 3) { + if (!strcmp(argv[0], argv[2])) { + ti->error = "Error setting origin device"; + r = -EINVAL; + goto bad_origin_dev; + } + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); if (r) { ti->error = "Error opening origin device"; @@ -4227,7 +4240,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - ti->split_discard_bios = false; } mutex_unlock(&dm_thin_pool_table.mutex); diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0ce04e5b4afb..b634fa23f4c4 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -73,7 +73,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, *offset = (unsigned)(position - (block << v->data_dev_block_bits)); res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); - if (unlikely(IS_ERR(res))) { + if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, (unsigned long long)(v->fec->start + block), @@ -163,7 +163,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); - if (unlikely(IS_ERR(par))) + if (IS_ERR(par)) return PTR_ERR(par); } } @@ -253,7 +253,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, } bbuf = dm_bufio_read(bufio, block, &buf); - if (unlikely(IS_ERR(bbuf))) { + if (IS_ERR(bbuf)) { DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", v->data_dev->name, (unsigned long long)rsb, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 2b8cee35e4d5..f7822875589e 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1859,7 +1859,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1); + wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); if (!wc->writeback_wq) { r = -ENOMEM; ti->error = "Could not allocate writeback workqueue"; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 6af5babe6837..8865c1709e16 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->per_io_data_size = sizeof(struct dmz_bioctx); ti->flush_supported = true; ti->discards_supported = true; - ti->split_discard_bios = true; /* The exposed capacity is the number of chunks that can be mapped */ ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 515e6af9bed2..68d24056d0b1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -158,9 +158,6 @@ struct table_device { struct dm_dev dm_dev; }; -static struct kmem_cache *_rq_tio_cache; -static struct kmem_cache *_rq_cache; - /* * Bio-based DM's mempools' reserved IOs set by the user. */ @@ -222,20 +219,11 @@ static unsigned dm_get_numa_node(void) static int __init local_init(void) { - int r = -ENOMEM; - - _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); - if (!_rq_tio_cache) - return r; - - _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request), - __alignof__(struct request), 0, NULL); - if (!_rq_cache) - goto out_free_rq_tio_cache; + int r; r = dm_uevent_init(); if (r) - goto out_free_rq_cache; + return r; deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); if (!deferred_remove_workqueue) { @@ -257,10 +245,6 @@ out_free_workqueue: destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); -out_free_rq_cache: - kmem_cache_destroy(_rq_cache); -out_free_rq_tio_cache: - kmem_cache_destroy(_rq_tio_cache); return r; } @@ -270,8 +254,6 @@ static void local_exit(void) flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); - kmem_cache_destroy(_rq_cache); - kmem_cache_destroy(_rq_tio_cache); unregister_blkdev(_major, _name); dm_uevent_exit(); @@ -1478,17 +1460,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti) return ti->num_write_zeroes_bios; } -typedef bool (*is_split_required_fn)(struct dm_target *ti); - -static bool is_split_required_for_discard(struct dm_target *ti) -{ - return ti->split_discard_bios; -} - static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, bool is_split_required) + unsigned num_bios) { - unsigned len; + unsigned len = ci->sector_count; /* * Even though the device advertised support for this type of @@ -1499,11 +1474,6 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * if (!num_bios) return -EOPNOTSUPP; - if (!is_split_required) - len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); - else - len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; @@ -1514,23 +1484,38 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * static int __send_discard(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti), - is_split_required_for_discard(ti)); + return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti)); } static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti)); } static int __send_write_same(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti)); } static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti)); +} + +static bool is_abnormal_io(struct bio *bio) +{ + bool r = false; + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE_ZEROES: + r = true; + break; + } + + return r; } static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, @@ -1565,7 +1550,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - if (unlikely(__process_abnormal_io(ci, ti, &r))) + if (__process_abnormal_io(ci, ti, &r)) return r; len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); @@ -1601,13 +1586,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - - blk_queue_split(md->queue, &bio); - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1675,18 +1653,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * Optimized variant of __split_and_process_bio that leverages the * fact that targets that use it do _not_ have a need to split bios. */ -static blk_qc_t __process_bio(struct mapped_device *md, - struct dm_table *map, struct bio *bio) +static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, + struct bio *bio, struct dm_target *ti) { struct clone_info ci; blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1704,21 +1677,11 @@ static blk_qc_t __process_bio(struct mapped_device *md, error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { - struct dm_target *ti = md->immutable_target; struct dm_target_io *tio; - /* - * Defend against IO still getting in during teardown - * - as was seen for a time with nvme-fcloop - */ - if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) { - error = -EIO; - goto out; - } - ci.bio = bio; ci.sector_count = bio_sectors(bio); - if (unlikely(__process_abnormal_io(&ci, ti, &error))) + if (__process_abnormal_io(&ci, ti, &error)) goto out; tio = alloc_tio(&ci, ti, 0, GFP_NOIO); @@ -1730,11 +1693,55 @@ out: return ret; } +static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio) +{ + unsigned len, sector_count; + + sector_count = bio_sectors(*bio); + len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count); + + if (sector_count > len) { + struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split); + + bio_chain(split, *bio); + trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector); + generic_make_request(*bio); + *bio = split; + } +} + static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { + blk_qc_t ret = BLK_QC_T_NONE; + struct dm_target *ti = md->immutable_target; + + if (unlikely(!map)) { + bio_io_error(bio); + return ret; + } + + if (!ti) { + ti = dm_table_find_target(map, bio->bi_iter.bi_sector); + if (unlikely(!ti || !dm_target_is_valid(ti))) { + bio_io_error(bio); + return ret; + } + } + + /* + * If in ->make_request_fn we need to use blk_queue_split(), otherwise + * queue_limits for abnormal requests (e.g. discard, writesame, etc) + * won't be imposed. + */ + if (current->bio_list) { + blk_queue_split(md->queue, &bio); + if (!is_abnormal_io(bio)) + dm_queue_split(md, ti, &bio); + } + if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - return __process_bio(md, map, bio); + return __process_bio(md, map, bio, ti); else return __split_and_process_bio(md, map, bio); } diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index d45c697c0ebe..5998d78aa189 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -96,8 +96,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) int i, cnt; bool discard_supported = false; - conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), - GFP_KERNEL); + conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); if (!conf) return NULL; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 492a3f8ac119..3972232b8037 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fa47249fa3e4..fdf451aac369 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1603,11 +1603,9 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev) return; } set_bit(Blocked, &rdev->flags); - if (test_and_clear_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) mddev->degraded++; - set_bit(Faulty, &rdev->flags); - } else - set_bit(Faulty, &rdev->flags); + set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. @@ -2120,13 +2118,14 @@ static void process_checks(struct r1bio *r1_bio) struct page **spages = get_resync_pages(sbio)->pages; struct bio_vec *bi; int page_len[RESYNC_PAGES] = { 0 }; + struct bvec_iter_all iter_all; if (sbio->bi_end_io != end_sync_read) continue; /* Now we can 'fixup' the error value */ sbio->bi_status = 0; - bio_for_each_segment_all(bi, sbio, j) + bio_for_each_segment_all(bi, sbio, j, iter_all) page_len[j] = bi->bv_len; if (!status) { |