diff options
Diffstat (limited to 'drivers/md')
57 files changed, 456 insertions, 492 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index b2d10063d35f..d4697e79d5a3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,6 +5,7 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES + select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index da50f6661bae..8998e61efa40 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -189,23 +189,16 @@ static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); } -static inline void new_bucket_swap(void *l, void *r, void __always_unused *args) -{ - struct bucket **lhs = l, **rhs = r; - - swap(*lhs, *rhs); -} - static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; const struct min_heap_callbacks bucket_max_cmp_callback = { .less = new_bucket_max_cmp, - .swp = new_bucket_swap, + .swp = NULL, }; const struct min_heap_callbacks bucket_min_cmp_callback = { .less = new_bucket_min_cmp, - .swp = new_bucket_swap, + .swp = NULL, }; ca->heap.nr = 0; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index bd97d8626887..68258a16e125 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1093,14 +1093,6 @@ static inline bool new_btree_iter_cmp(const void *l, const void *r, void __alway return bkey_cmp(_l->k, _r->k) <= 0; } -static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) -{ - struct btree_iter_set *_iter1 = iter1; - struct btree_iter_set *_iter2 = iter2; - - swap(*_iter1, *_iter2); -} - static inline bool btree_iter_end(struct btree_iter *iter) { return !iter->heap.nr; @@ -1111,7 +1103,7 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, { const struct min_heap_callbacks callbacks = { .less = new_btree_iter_cmp, - .swp = new_btree_iter_swap, + .swp = NULL, }; if (k != end) @@ -1157,7 +1149,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *ret = NULL; const struct min_heap_callbacks callbacks = { .less = cmp, - .swp = new_btree_iter_swap, + .swp = NULL, }; if (!btree_iter_end(iter)) { @@ -1231,7 +1223,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, : bch_ptr_invalid; const struct min_heap_callbacks callbacks = { .less = b->ops->sort_cmp, - .swp = new_btree_iter_swap, + .swp = NULL, }; /* Heapify the iterator, using our comparison function */ diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index a7221e5dbe81..4b84fda1530a 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -266,20 +266,12 @@ static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_ return !(c ? c > 0 : _l->k < _r->k); } -static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) -{ - struct btree_iter_set *_iter1 = iter1; - struct btree_iter_set *_iter2 = iter2; - - swap(*_iter1, *_iter2); -} - static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { const struct min_heap_callbacks callbacks = { .less = new_bch_extent_sort_cmp, - .swp = new_btree_iter_swap, + .swp = NULL, }; while (iter->heap.nr > 1) { struct btree_iter_set *top = iter->heap.data, *i = top + 1; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 7f482729c56d..ef6abf33f926 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -190,14 +190,6 @@ static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *a return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); } -static void new_bucket_swap(void *l, void *r, void __always_unused *args) -{ - struct bucket **_l = l; - struct bucket **_r = r; - - swap(*_l, *_r); -} - static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; @@ -212,7 +204,7 @@ void bch_moving_gc(struct cache_set *c) unsigned long sectors_to_move, reserve_sectors; const struct min_heap_callbacks callbacks = { .less = new_bucket_cmp, - .swp = new_bucket_swap, + .swp = NULL, }; if (!c->copy_gc_enabled) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e7abfdd77c3b..e42f1400cea9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1718,7 +1718,7 @@ static CLOSURE_CALLBACK(cache_set_flush) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR(c->root)) + if (!IS_ERR_OR_NULL(c->root)) list_add(&c->root->list, &c->btree_cache); /* diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c index bca0f39e15b8..b4d1c4329df3 100644 --- a/drivers/md/dm-bio-prison-v1.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -198,15 +198,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, } EXPORT_SYMBOL_GPL(dm_bio_detain); -int dm_get_cell(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result) -{ - return bio_detain(prison, key, NULL, cell_prealloc, cell_result); -} -EXPORT_SYMBOL_GPL(dm_get_cell); - /* * @inmates must have been initialised prior to this call */ @@ -288,32 +279,6 @@ void dm_cell_visit_release(struct dm_bio_prison *prison, } EXPORT_SYMBOL_GPL(dm_cell_visit_release); -static int __promote_or_release(struct rb_root *root, - struct dm_bio_prison_cell *cell) -{ - if (bio_list_empty(&cell->bios)) { - rb_erase(&cell->node, root); - return 1; - } - - cell->holder = bio_list_pop(&cell->bios); - return 0; -} - -int dm_cell_promote_or_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell) -{ - int r; - unsigned l = lock_nr(&cell->key, prison->num_locks); - - spin_lock_irq(&prison->regions[l].lock); - r = __promote_or_release(&prison->regions[l].cell, cell); - spin_unlock_irq(&prison->regions[l].lock); - - return r; -} -EXPORT_SYMBOL_GPL(dm_cell_promote_or_release); - /*----------------------------------------------------------------*/ #define DEFERRED_SET_SIZE 64 diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h index 2a097ed0d85e..d39706c48447 100644 --- a/drivers/md/dm-bio-prison-v1.h +++ b/drivers/md/dm-bio-prison-v1.h @@ -73,17 +73,6 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison, struct dm_bio_prison_cell *cell); /* - * Creates, or retrieves a cell that overlaps the given key. - * - * Returns 1 if pre-existing cell returned, zero if new cell created using - * @cell_prealloc. - */ -int dm_get_cell(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result); - -/* * Returns false if key is beyond BIO_PRISON_MAX_RANGE or spans a boundary. */ bool dm_cell_key_has_valid_range(struct dm_cell_key *key); @@ -117,19 +106,6 @@ void dm_cell_visit_release(struct dm_bio_prison *prison, void (*visit_fn)(void *, struct dm_bio_prison_cell *), void *context, struct dm_bio_prison_cell *cell); -/* - * Rather than always releasing the prisoners in a cell, the client may - * want to promote one of them to be the new holder. There is a race here - * though between releasing an empty cell, and other threads adding new - * inmates. So this function makes the decision with its lock held. - * - * This function can have two outcomes: - * i) An inmate is promoted to be the holder of the cell (return value of 0). - * ii) The cell has no inmate for promotion and is released (return value of 1). - */ -int dm_cell_promote_or_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell); - /*----------------------------------------------------------------*/ /* diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d478aafa02c9..aab8240429b0 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -318,9 +318,10 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con */ enum data_mode { DATA_MODE_SLAB = 0, - DATA_MODE_GET_FREE_PAGES = 1, - DATA_MODE_VMALLOC = 2, - DATA_MODE_LIMIT = 3 + DATA_MODE_KMALLOC = 1, + DATA_MODE_GET_FREE_PAGES = 2, + DATA_MODE_VMALLOC = 3, + DATA_MODE_LIMIT = 4 }; struct dm_buffer { @@ -1062,6 +1063,7 @@ static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; +static unsigned long dm_bufio_allocated_kmalloc; static unsigned long dm_bufio_allocated_get_free_pages; static unsigned long dm_bufio_allocated_vmalloc; static unsigned long dm_bufio_current_allocated; @@ -1104,6 +1106,7 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink) static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, + &dm_bufio_allocated_kmalloc, &dm_bufio_allocated_get_free_pages, &dm_bufio_allocated_vmalloc, }; @@ -1181,6 +1184,11 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, return kmem_cache_alloc(c->slab_cache, gfp_mask); } + if (unlikely(c->block_size < PAGE_SIZE)) { + *data_mode = DATA_MODE_KMALLOC; + return kmalloc(c->block_size, gfp_mask | __GFP_RECLAIMABLE); + } + if (c->block_size <= KMALLOC_MAX_SIZE && gfp_mask & __GFP_NORETRY) { *data_mode = DATA_MODE_GET_FREE_PAGES; @@ -1204,6 +1212,10 @@ static void free_buffer_data(struct dm_bufio_client *c, kmem_cache_free(c->slab_cache, data); break; + case DATA_MODE_KMALLOC: + kfree(data); + break; + case DATA_MODE_GET_FREE_PAGES: free_pages((unsigned long)data, c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); @@ -2471,7 +2483,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign int r; unsigned int num_locks; struct dm_bufio_client *c; - char slab_name[27]; + char slab_name[64]; + static atomic_t seqno = ATOMIC_INIT(0); if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { DMERR("%s: block size not specified or is not multiple of 512b", __func__); @@ -2518,11 +2531,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign goto bad_dm_io; } - if (block_size <= KMALLOC_MAX_SIZE && - (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { + if (block_size <= KMALLOC_MAX_SIZE && !is_power_of_2(block_size)) { unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE); - snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u-%u", + block_size, atomic_inc_return(&seqno)); c->slab_cache = kmem_cache_create(slab_name, block_size, align, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_cache) { @@ -2531,9 +2544,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign } } if (aux_size) - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u-%u", + aux_size, atomic_inc_return(&seqno)); else - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer"); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", + atomic_inc_return(&seqno)); c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_buffer) { @@ -2898,6 +2913,7 @@ static int __init dm_bufio_init(void) __u64 mem; dm_bufio_allocated_kmem_cache = 0; + dm_bufio_allocated_kmalloc = 0; dm_bufio_allocated_get_free_pages = 0; dm_bufio_allocated_vmalloc = 0; dm_bufio_current_allocated = 0; @@ -2986,6 +3002,9 @@ MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, 0444); MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); +module_param_named(allocated_kmalloc_bytes, dm_bufio_allocated_kmalloc, ulong, 0444); +MODULE_PARM_DESC(allocated_kmalloc_bytes, "Memory allocated with kmalloc_alloc"); + module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, 0444); MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 9c5308298cf1..b4165f172d62 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -11,12 +11,6 @@ #define DM_MSG_PREFIX "dm-background-tracker" -struct bt_work { - struct list_head list; - struct rb_node node; - struct policy_work work; -}; - struct background_tracker { unsigned int max_work; atomic_t pending_promotes; @@ -26,10 +20,10 @@ struct background_tracker { struct list_head issued; struct list_head queued; struct rb_root pending; - - struct kmem_cache *work_cache; }; +struct kmem_cache *btracker_work_cache = NULL; + struct background_tracker *btracker_create(unsigned int max_work) { struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL); @@ -48,12 +42,6 @@ struct background_tracker *btracker_create(unsigned int max_work) INIT_LIST_HEAD(&b->queued); b->pending = RB_ROOT; - b->work_cache = KMEM_CACHE(bt_work, 0); - if (!b->work_cache) { - DMERR("couldn't create mempool for background work items"); - kfree(b); - b = NULL; - } return b; } @@ -66,10 +54,9 @@ void btracker_destroy(struct background_tracker *b) BUG_ON(!list_empty(&b->issued)); list_for_each_entry_safe (w, tmp, &b->queued, list) { list_del(&w->list); - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); } - kmem_cache_destroy(b->work_cache); kfree(b); } EXPORT_SYMBOL_GPL(btracker_destroy); @@ -156,12 +143,6 @@ static void update_stats(struct background_tracker *b, struct policy_work *w, in } } -unsigned int btracker_nr_writebacks_queued(struct background_tracker *b) -{ - return atomic_read(&b->pending_writebacks); -} -EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued); - unsigned int btracker_nr_demotions_queued(struct background_tracker *b) { return atomic_read(&b->pending_demotes); @@ -180,7 +161,7 @@ static struct bt_work *alloc_work(struct background_tracker *b) if (max_work_reached(b)) return NULL; - return kmem_cache_alloc(b->work_cache, GFP_NOWAIT); + return kmem_cache_alloc(btracker_work_cache, GFP_NOWAIT); } int btracker_queue(struct background_tracker *b, @@ -203,7 +184,7 @@ int btracker_queue(struct background_tracker *b, * There was a race, we'll just ignore this second * bit of work for the same oblock. */ - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); return -EINVAL; } @@ -244,7 +225,7 @@ void btracker_complete(struct background_tracker *b, update_stats(b, &w->work, -1); rb_erase(&w->node, &b->pending); list_del(&w->list); - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); } EXPORT_SYMBOL_GPL(btracker_complete); diff --git a/drivers/md/dm-cache-background-tracker.h b/drivers/md/dm-cache-background-tracker.h index 5b8f5c667b81..47156c14a44a 100644 --- a/drivers/md/dm-cache-background-tracker.h +++ b/drivers/md/dm-cache-background-tracker.h @@ -26,6 +26,14 @@ * protected with a spinlock. */ +struct bt_work { + struct list_head list; + struct rb_node node; + struct policy_work work; +}; + +extern struct kmem_cache *btracker_work_cache; + struct background_work; struct background_tracker; @@ -42,7 +50,6 @@ struct background_tracker *btracker_create(unsigned int max_work); */ void btracker_destroy(struct background_tracker *b); -unsigned int btracker_nr_writebacks_queued(struct background_tracker *b); unsigned int btracker_nr_demotions_queued(struct background_tracker *b); /* diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 24cd87fddf75..a9a1ab284076 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1218,15 +1218,6 @@ int dm_cache_load_discards(struct dm_cache_metadata *cmd, return r; } -int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result) -{ - READ_LOCK(cmd); - *result = cmd->cache_blocks; - READ_UNLOCK(cmd); - - return 0; -} - static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) { int r; @@ -1507,30 +1498,6 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, return r; } -static int __dump_mapping(void *context, uint64_t cblock, void *leaf) -{ - __le64 value; - dm_oblock_t oblock; - unsigned int flags; - - memcpy(&value, leaf, sizeof(value)); - unpack_value(value, &oblock, &flags); - - return 0; -} - -static int __dump_mappings(struct dm_cache_metadata *cmd) -{ - return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL); -} - -void dm_cache_dump(struct dm_cache_metadata *cmd) -{ - READ_LOCK_VOID(cmd); - __dump_mappings(cmd); - READ_UNLOCK(cmd); -} - int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) { int r; diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 57afc7047947..5f77890207fe 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -71,7 +71,6 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd); * origin blocks to map to. */ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size); -int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result); int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, sector_t discard_block_size, @@ -123,8 +122,6 @@ int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, dm_block_t *result); -void dm_cache_dump(struct dm_cache_metadata *cmd); - /* * The policy is invited to save a 32bit hint value for every cblock (eg, * for a hit count). These are stored against the policy name. If diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 40709310e327..9cb797a561d6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -10,6 +10,7 @@ #include "dm-bio-record.h" #include "dm-cache-metadata.h" #include "dm-io-tracker.h" +#include "dm-cache-background-tracker.h" #include <linux/dm-io.h> #include <linux/dm-kcopyd.h> @@ -2263,7 +2264,7 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv, /*----------------------------------------------------------------*/ -static struct kmem_cache *migration_cache; +static struct kmem_cache *migration_cache = NULL; #define NOT_CORE_OPTION 1 @@ -3361,7 +3362,7 @@ static int cache_iterate_devices(struct dm_target *ti, static void disable_passdown_if_not_supported(struct cache *cache) { struct block_device *origin_bdev = cache->origin_dev->bdev; - struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + struct queue_limits *origin_limits = bdev_limits(origin_bdev); const char *reason = NULL; if (!cache->features.discard_passdown) @@ -3383,7 +3384,7 @@ static void disable_passdown_if_not_supported(struct cache *cache) static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { struct block_device *origin_bdev = cache->origin_dev->bdev; - struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + struct queue_limits *origin_limits = bdev_limits(origin_bdev); if (!cache->features.discard_passdown) { /* No passdown is done so setting own virtual limits */ @@ -3445,22 +3446,36 @@ static int __init dm_cache_init(void) int r; migration_cache = KMEM_CACHE(dm_cache_migration, 0); - if (!migration_cache) - return -ENOMEM; + if (!migration_cache) { + r = -ENOMEM; + goto err; + } + + btracker_work_cache = kmem_cache_create("dm_cache_bt_work", + sizeof(struct bt_work), __alignof__(struct bt_work), 0, NULL); + if (!btracker_work_cache) { + r = -ENOMEM; + goto err; + } r = dm_register_target(&cache_target); if (r) { - kmem_cache_destroy(migration_cache); - return r; + goto err; } return 0; + +err: + kmem_cache_destroy(migration_cache); + kmem_cache_destroy(btracker_work_cache); + return r; } static void __exit dm_cache_exit(void) { dm_unregister_target(&cache_target); kmem_cache_destroy(migration_cache); + kmem_cache_destroy(btracker_work_cache); } module_init(dm_cache_init); diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 12bbe487a4c8..e956d980672c 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2020,7 +2020,7 @@ static void clone_resume(struct dm_target *ti) static void disable_passdown_if_not_supported(struct clone *clone) { struct block_device *dest_dev = clone->dest_dev->bdev; - struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits; + struct queue_limits *dest_limits = bdev_limits(dest_dev); const char *reason = NULL; if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) @@ -2041,7 +2041,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) static void set_discard_limits(struct clone *clone, struct queue_limits *limits) { struct block_device *dest_bdev = clone->dest_dev->bdev; - struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits; + struct queue_limits *dest_limits = bdev_limits(dest_bdev); if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) { /* No passdown is done so we set our own virtual limits */ diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index ec5db1478b2f..18ae45dcbfb2 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -442,7 +442,7 @@ static int ebs_iterate_devices(struct dm_target *ti, static struct target_type ebs_target = { .name = "ebs", .version = {1, 0, 1}, - .features = DM_TARGET_PASSES_INTEGRITY, + .features = 0, .module = THIS_MODULE, .ctr = ebs_ctr, .dtr = ebs_dtr, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f299ff393a6a..d42eac944eb5 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1912,7 +1912,7 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user, if ((kernel_params->version[0] != DM_VERSION_MAJOR) || (kernel_params->version[1] > DM_VERSION_MINOR)) { - DMERR("ioctl interface mismatch: kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", + DMERR_LIMIT("ioctl interface mismatch: kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, kernel_params->version[0], @@ -1961,7 +1961,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern if (unlikely(param_kernel->data_size < minimum_data_size) || unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { - DMERR("Invalid data size in the ioctl structure: %u", + DMERR_LIMIT("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dbd39b9722b9..bd8b796ae683 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1033,11 +1033,6 @@ struct dm_target *dm_table_get_wildcard_target(struct dm_table *t) return NULL; } -bool dm_table_bio_based(struct dm_table *t) -{ - return __table_type_bio_based(dm_table_get_type(t)); -} - bool dm_table_request_based(struct dm_table *t) { return __table_type_request_based(dm_table_get_type(t)); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 89632ce97760..05cf4e3f2bbe 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2332,10 +2332,9 @@ static struct thin_c *get_first_thin(struct pool *pool) struct thin_c *tc = NULL; rcu_read_lock(); - if (!list_empty(&pool->active_thins)) { - tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); + tc = list_first_or_null_rcu(&pool->active_thins, struct thin_c, list); + if (tc) thin_get(tc); - } rcu_read_unlock(); return tc; @@ -2484,6 +2483,7 @@ static void pool_work_wait(struct pool_work *pw, struct pool *pool, init_completion(&pw->complete); queue_work(pool->wq, &pw->worker); wait_for_completion(&pw->complete); + destroy_work_on_stack(&pw->worker); } /*----------------------------------------------------------------*/ @@ -2842,7 +2842,7 @@ static void disable_discard_passdown_if_not_supported(struct pool_c *pt) { struct pool *pool = pt->pool; struct block_device *data_bdev = pt->data_dev->bdev; - struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + struct queue_limits *data_limits = bdev_limits(data_bdev); const char *reason = NULL; if (!pt->adjusted_pf.discard_passdown) diff --git a/drivers/md/dm-vdo/Kconfig b/drivers/md/dm-vdo/Kconfig index 111ecd2c2a24..2400b2bc4bc7 100644 --- a/drivers/md/dm-vdo/Kconfig +++ b/drivers/md/dm-vdo/Kconfig @@ -7,6 +7,7 @@ config DM_VDO select DM_BUFIO select LZ4_COMPRESS select LZ4_DECOMPRESS + select MIN_HEAP help This device mapper target presents a block device with deduplication, compression and thin-provisioning. diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c index a0a7c1bd634e..89cb7942ec5c 100644 --- a/drivers/md/dm-vdo/block-map.c +++ b/drivers/md/dm-vdo/block-map.c @@ -209,8 +209,6 @@ static int initialize_info(struct vdo_page_cache *cache) /** * allocate_cache_components() - Allocate components of the cache which require their own * allocation. - * @maximum_age: The number of journal blocks before a dirtied page is considered old and must be - * written out. * * The caller is responsible for all clean up on errors. * diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c index 0d502f6a86ad..810002747091 100644 --- a/drivers/md/dm-vdo/data-vio.c +++ b/drivers/md/dm-vdo/data-vio.c @@ -327,8 +327,9 @@ static u32 __must_check pack_status(struct data_vio_compression_status status) /** * set_data_vio_compression_status() - Set the compression status of a data_vio. - * @state: The expected current status of the data_vio. - * @new_state: The status to set. + * @data_vio: The data_vio to change. + * @status: The expected current status of the data_vio. + * @new_status: The status to set. * * Return: true if the new status was set, false if the data_vio's compression status did not * match the expected state, and so was left unchanged. @@ -836,7 +837,7 @@ static void destroy_data_vio(struct data_vio *data_vio) * @vdo: The vdo to which the pool will belong. * @pool_size: The number of data_vios in the pool. * @discard_limit: The maximum number of data_vios which may be used for discards. - * @pool: A pointer to hold the newly allocated pool. + * @pool_ptr: A pointer to hold the newly allocated pool. */ int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size, data_vio_count_t discard_limit, struct data_vio_pool **pool_ptr) @@ -1074,35 +1075,6 @@ void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios) spin_unlock(&pool->lock); } -data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool) -{ - return READ_ONCE(pool->discard_limiter.busy); -} - -data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool) -{ - return READ_ONCE(pool->discard_limiter.limit); -} - -data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool) -{ - return READ_ONCE(pool->discard_limiter.max_busy); -} - -int set_data_vio_pool_discard_limit(struct data_vio_pool *pool, data_vio_count_t limit) -{ - if (get_data_vio_pool_request_limit(pool) < limit) { - // The discard limit may not be higher than the data_vio limit. - return -EINVAL; - } - - spin_lock(&pool->lock); - pool->discard_limiter.limit = limit; - spin_unlock(&pool->lock); - - return VDO_SUCCESS; -} - data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool) { return READ_ONCE(pool->limiter.busy); diff --git a/drivers/md/dm-vdo/data-vio.h b/drivers/md/dm-vdo/data-vio.h index 25926b6cd98b..067b983bb291 100644 --- a/drivers/md/dm-vdo/data-vio.h +++ b/drivers/md/dm-vdo/data-vio.h @@ -336,11 +336,6 @@ void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *comp void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion); void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios); -data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool); -data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool); -data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool); -int __must_check set_data_vio_pool_discard_limit(struct data_vio_pool *pool, - data_vio_count_t limit); data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool); data_vio_count_t get_data_vio_pool_request_limit(struct data_vio_pool *pool); data_vio_count_t get_data_vio_pool_maximum_requests(struct data_vio_pool *pool); diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 80628ae93fba..b6f8e2dc7729 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -565,7 +565,7 @@ static void wait_on_hash_lock(struct hash_lock *lock, struct data_vio *data_vio) * @waiter: The data_vio's waiter link. * @context: Not used. */ -static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused) +static void abort_waiter(struct vdo_waiter *waiter, void __always_unused *context) { write_data_vio(vdo_waiter_as_data_vio(waiter)); } @@ -1727,7 +1727,7 @@ static void report_bogus_lock_state(struct hash_lock *lock, struct data_vio *dat /** * vdo_continue_hash_lock() - Continue the processing state after writing, compressing, or * deduplicating. - * @data_vio: The data_vio to continue processing in its hash lock. + * @completion: The data_vio completion to continue processing in its hash lock. * * Asynchronously continue processing a data_vio in its hash lock after it has finished writing, * compressing, or deduplicating, so it can share the result with any data_vios waiting in the hash @@ -1825,7 +1825,7 @@ static inline int assert_hash_lock_preconditions(const struct data_vio *data_vio /** * vdo_acquire_hash_lock() - Acquire or share a lock on a record name. - * @data_vio: The data_vio acquiring a lock on its record name. + * @completion: The data_vio completion acquiring a lock on its record name. * * Acquire or share a lock on the hash (record name) of the data in a data_vio, updating the * data_vio to reference the lock. This must only be called in the correct thread for the zone. In @@ -2679,7 +2679,8 @@ static void get_index_statistics(struct hash_zones *zones, /** * vdo_get_dedupe_statistics() - Tally the statistics from all the hash zones and the UDS index. - * @hash_zones: The hash zones to query + * @zones: The hash zones to query + * @stats: A structure to store the statistics * * Return: The sum of the hash lock statistics from all hash zones plus the statistics from the UDS * index diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c index a34ea0229d53..100e92f8f866 100644 --- a/drivers/md/dm-vdo/encodings.c +++ b/drivers/md/dm-vdo/encodings.c @@ -858,7 +858,7 @@ static int __must_check make_partition(struct layout *layout, enum partition_id /** * vdo_initialize_layout() - Lay out the partitions of a vdo. * @size: The entire size of the vdo. - * @origin: The start of the layout on the underlying storage in blocks. + * @offset: The start of the layout on the underlying storage in blocks. * @block_map_blocks: The size of the block map partition. * @journal_blocks: The size of the journal partition. * @summary_blocks: The size of the slab summary partition. diff --git a/drivers/md/dm-vdo/indexer/index-layout.c b/drivers/md/dm-vdo/indexer/index-layout.c index 627adc24af3b..af8fab83b0f3 100644 --- a/drivers/md/dm-vdo/indexer/index-layout.c +++ b/drivers/md/dm-vdo/indexer/index-layout.c @@ -248,32 +248,6 @@ static int __must_check compute_sizes(const struct uds_configuration *config, return UDS_SUCCESS; } -int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size) -{ - int result; - struct uds_configuration *index_config; - struct save_layout_sizes sizes; - - if (index_size == NULL) { - vdo_log_error("Missing output size pointer"); - return -EINVAL; - } - - result = uds_make_configuration(parameters, &index_config); - if (result != UDS_SUCCESS) { - vdo_log_error_strerror(result, "cannot compute index size"); - return uds_status_to_errno(result); - } - - result = compute_sizes(index_config, &sizes); - uds_free_configuration(index_config); - if (result != UDS_SUCCESS) - return uds_status_to_errno(result); - - *index_size = sizes.total_size; - return UDS_SUCCESS; -} - /* Create unique data using the current time and a pseudorandom number. */ static void create_unique_nonce_data(u8 *buffer) { diff --git a/drivers/md/dm-vdo/indexer/indexer.h b/drivers/md/dm-vdo/indexer/indexer.h index 3744aaf625b0..183a94eb7e92 100644 --- a/drivers/md/dm-vdo/indexer/indexer.h +++ b/drivers/md/dm-vdo/indexer/indexer.h @@ -283,10 +283,6 @@ struct uds_request { enum uds_index_region location; }; -/* Compute the number of bytes needed to store an index. */ -int __must_check uds_compute_index_size(const struct uds_parameters *parameters, - u64 *index_size); - /* A session is required for most index operations. */ int __must_check uds_create_index_session(struct uds_index_session **session); diff --git a/drivers/md/dm-vdo/int-map.c b/drivers/md/dm-vdo/int-map.c index f6fe58e437b3..aeb690415dbd 100644 --- a/drivers/md/dm-vdo/int-map.c +++ b/drivers/md/dm-vdo/int-map.c @@ -70,7 +70,7 @@ * it's crucial to keep the hop fields near the buckets that they use them so they'll tend to share * cache lines. */ -struct __packed bucket { +struct bucket { /** * @first_hop: The biased offset of the first entry in the hop list of the neighborhood * that hashes to this bucket. @@ -82,7 +82,7 @@ struct __packed bucket { u64 key; /** @value: The value stored in this bucket (NULL if empty). */ void *value; -}; +} __packed; /** * struct int_map - The concrete definition of the opaque int_map type. @@ -310,7 +310,6 @@ static struct bucket *select_bucket(const struct int_map *map, u64 key) /** * search_hop_list() - Search the hop list associated with given hash bucket for a given search * key. - * @map: The map being searched. * @bucket: The map bucket to search for the key. * @key: The mapping key. * @previous_ptr: Output. if not NULL, a pointer in which to store the bucket in the list preceding @@ -321,9 +320,7 @@ static struct bucket *select_bucket(const struct int_map *map, u64 key) * * Return: An entry that matches the key, or NULL if not found. */ -static struct bucket *search_hop_list(struct int_map *map __always_unused, - struct bucket *bucket, - u64 key, +static struct bucket *search_hop_list(struct bucket *bucket, u64 key, struct bucket **previous_ptr) { struct bucket *previous = NULL; @@ -357,7 +354,7 @@ static struct bucket *search_hop_list(struct int_map *map __always_unused, */ void *vdo_int_map_get(struct int_map *map, u64 key) { - struct bucket *match = search_hop_list(map, select_bucket(map, key), key, NULL); + struct bucket *match = search_hop_list(select_bucket(map, key), key, NULL); return ((match != NULL) ? match->value : NULL); } @@ -443,7 +440,6 @@ find_empty_bucket(struct int_map *map, struct bucket *bucket, unsigned int max_p /** * move_empty_bucket() - Move an empty bucket closer to the start of the bucket array. - * @map: The map containing the bucket. * @hole: The empty bucket to fill with an entry that precedes it in one of its enclosing * neighborhoods. * @@ -454,8 +450,7 @@ find_empty_bucket(struct int_map *map, struct bucket *bucket, unsigned int max_p * Return: The bucket that was vacated by moving its entry to the provided hole, or NULL if no * entry could be moved. */ -static struct bucket *move_empty_bucket(struct int_map *map __always_unused, - struct bucket *hole) +static struct bucket *move_empty_bucket(struct bucket *hole) { /* * Examine every neighborhood that the empty bucket is part of, starting with the one in @@ -516,7 +511,6 @@ static struct bucket *move_empty_bucket(struct int_map *map __always_unused, /** * update_mapping() - Find and update any existing mapping for a given key, returning the value * associated with the key in the provided pointer. - * @map: The int_map to attempt to modify. * @neighborhood: The first bucket in the neighborhood that would contain the search key * @key: The key with which to associate the new value. * @new_value: The value to be associated with the key. @@ -525,10 +519,10 @@ static struct bucket *move_empty_bucket(struct int_map *map __always_unused, * * Return: true if the map contains a mapping for the key, false if it does not. */ -static bool update_mapping(struct int_map *map, struct bucket *neighborhood, - u64 key, void *new_value, bool update, void **old_value_ptr) +static bool update_mapping(struct bucket *neighborhood, u64 key, void *new_value, + bool update, void **old_value_ptr) { - struct bucket *bucket = search_hop_list(map, neighborhood, key, NULL); + struct bucket *bucket = search_hop_list(neighborhood, key, NULL); if (bucket == NULL) { /* There is no bucket containing the key in the neighborhood. */ @@ -584,7 +578,7 @@ static struct bucket *find_or_make_vacancy(struct int_map *map, * The nearest empty bucket isn't within the neighborhood that must contain the new * entry, so try to swap it with bucket that is closer. */ - hole = move_empty_bucket(map, hole); + hole = move_empty_bucket(hole); } return NULL; @@ -625,7 +619,7 @@ int vdo_int_map_put(struct int_map *map, u64 key, void *new_value, bool update, * Check whether the neighborhood already contains an entry for the key, in which case we * optionally update it, returning the old value. */ - if (update_mapping(map, neighborhood, key, new_value, update, old_value_ptr)) + if (update_mapping(neighborhood, key, new_value, update, old_value_ptr)) return VDO_SUCCESS; /* @@ -679,7 +673,7 @@ void *vdo_int_map_remove(struct int_map *map, u64 key) /* Select the bucket to search and search it for an existing entry. */ struct bucket *bucket = select_bucket(map, key); struct bucket *previous; - struct bucket *victim = search_hop_list(map, bucket, key, &previous); + struct bucket *victim = search_hop_list(bucket, key, &previous); if (victim == NULL) { /* There is no matching entry to remove. */ diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c index ab62abe18827..421e5436c32c 100644 --- a/drivers/md/dm-vdo/io-submitter.c +++ b/drivers/md/dm-vdo/io-submitter.c @@ -367,7 +367,7 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, * completions. * @max_requests_active: Number of bios for merge tracking. * @vdo: The vdo which will use this submitter. - * @io_submitter: pointer to the new data structure. + * @io_submitter_ptr: pointer to the new data structure. * * Return: VDO_SUCCESS or an error. */ diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 13008b089206..b0b0587d85f3 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -44,14 +44,11 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) u64 *hash_out = out; /* body */ - - const u64 *blocks = (const u64 *)(data); - int i; for (i = 0; i < nblocks; i++) { - u64 k1 = get_unaligned_le64(&blocks[i * 2]); - u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]); + u64 k1 = get_unaligned_le64(&data[i * 16]); + u64 k2 = get_unaligned_le64(&data[i * 16 + 8]); k1 *= c1; k1 = ROTL64(k1, 31); diff --git a/drivers/md/dm-vdo/packer.c b/drivers/md/dm-vdo/packer.c index 16cf29b4c90a..f70f5edabc10 100644 --- a/drivers/md/dm-vdo/packer.c +++ b/drivers/md/dm-vdo/packer.c @@ -250,7 +250,6 @@ static void abort_packing(struct data_vio *data_vio) /** * release_compressed_write_waiter() - Update a data_vio for which a successful compressed write * has completed and send it on its way. - * @data_vio: The data_vio to release. * @allocation: The allocation to which the compressed block was written. */ @@ -383,7 +382,7 @@ static void initialize_compressed_block(struct compressed_block *block, u16 size * @compression: The agent's compression_state to pack in to. * @data_vio: The data_vio to pack. * @offset: The offset into the compressed block at which to pack the fragment. - * @compressed_block: The compressed block which will be written out when batch is fully packed. + * @block: The compressed block which will be written out when batch is fully packed. * * Return: The new amount of space used. */ diff --git a/drivers/md/dm-vdo/physical-zone.c b/drivers/md/dm-vdo/physical-zone.c index 2fee3a7c1191..a43b5c45fab7 100644 --- a/drivers/md/dm-vdo/physical-zone.c +++ b/drivers/md/dm-vdo/physical-zone.c @@ -517,7 +517,7 @@ static int allocate_and_lock_block(struct allocation *allocation) * @waiter: The allocating_vio that was waiting to allocate. * @context: The context (unused). */ -static void retry_allocation(struct vdo_waiter *waiter, void *context __always_unused) +static void retry_allocation(struct vdo_waiter *waiter, void __always_unused *context) { struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter); diff --git a/drivers/md/dm-vdo/recovery-journal.c b/drivers/md/dm-vdo/recovery-journal.c index ee6321a3e523..de58184f538f 100644 --- a/drivers/md/dm-vdo/recovery-journal.c +++ b/drivers/md/dm-vdo/recovery-journal.c @@ -1365,7 +1365,7 @@ static void add_queued_recovery_entries(struct recovery_journal_block *block) * * Implements waiter_callback_fn. */ -static void write_block(struct vdo_waiter *waiter, void *context __always_unused) +static void write_block(struct vdo_waiter *waiter, void __always_unused *context) { struct recovery_journal_block *block = container_of(waiter, struct recovery_journal_block, write_waiter); diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index ffff2c999518..8c006fb3afcf 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -166,7 +166,7 @@ static void swap_mappings(void *item1, void *item2, void __always_unused *args) static const struct min_heap_callbacks repair_min_heap = { .less = mapping_is_less_than, - .swp = swap_mappings, + .swp = NULL, }; static struct numbered_block_mapping *sort_next_heap_element(struct repair_completion *repair) diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 274f9ccd072f..8f0a35c63af6 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -1287,7 +1287,7 @@ static struct reference_block * __must_check get_reference_block(struct vdo_slab * slab_block_number_from_pbn() - Determine the index within the slab of a particular physical * block number. * @slab: The slab. - * @physical_block_number: The physical block number. + * @pbn: The physical block number. * @slab_block_number_ptr: A pointer to the slab block number. * * Return: VDO_SUCCESS or an error code. @@ -1459,7 +1459,6 @@ static int increment_for_data(struct vdo_slab *slab, struct reference_block *blo * @block_number: The block to update. * @old_status: The reference status of the data block before this decrement. * @updater: The reference updater doing this operation in case we need to look up the pbn lock. - * @lock: The pbn_lock associated with the block being decremented (may be NULL). * @counter_ptr: A pointer to the count for the data block (in, out). * @adjust_block_count: Whether to update the allocator's free block count. * @@ -3232,8 +3231,7 @@ int vdo_enqueue_clean_slab_waiter(struct block_allocator *allocator, /** * vdo_modify_reference_count() - Modify the reference count of a block by first making a slab * journal entry and then updating the reference counter. - * - * @data_vio: The data_vio for which to add the entry. + * @completion: The data_vio completion for which to add the entry. * @updater: Which of the data_vio's reference updaters is being submitted. */ void vdo_modify_reference_count(struct vdo_completion *completion, @@ -3301,17 +3299,9 @@ static bool slab_status_is_less_than(const void *item1, const void *item2, return info1->slab_number < info2->slab_number; } -static void swap_slab_statuses(void *item1, void *item2, void __always_unused *args) -{ - struct slab_status *info1 = item1; - struct slab_status *info2 = item2; - - swap(*info1, *info2); -} - static const struct min_heap_callbacks slab_status_min_heap = { .less = slab_status_is_less_than, - .swp = swap_slab_statuses, + .swp = NULL, }; /* Inform the slab actor that a action has finished on some slab; used by apply_to_slabs(). */ @@ -4750,8 +4740,7 @@ void vdo_use_new_slabs(struct slab_depot *depot, struct vdo_completion *parent) /** * stop_scrubbing() - Tell the scrubber to stop scrubbing after it finishes the slab it is * currently working on. - * @scrubber: The scrubber to stop. - * @parent: The completion to notify when scrubbing has stopped. + * @allocator: The block allocator owning the scrubber to stop. */ static void stop_scrubbing(struct block_allocator *allocator) { diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c index fff847767755..a7e32baab4af 100644 --- a/drivers/md/dm-vdo/vdo.c +++ b/drivers/md/dm-vdo/vdo.c @@ -643,7 +643,7 @@ static void finish_vdo(struct vdo *vdo) /** * free_listeners() - Free the list of read-only listeners associated with a thread. - * @thread_data: The thread holding the list to free. + * @thread: The thread holding the list to free. */ static void free_listeners(struct vdo_thread *thread) { @@ -852,7 +852,7 @@ int vdo_synchronous_flush(struct vdo *vdo) /** * vdo_get_state() - Get the current state of the vdo. * @vdo: The vdo. - + * * Context: This method may be called from any thread. * * Return: The current state of the vdo. diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c index b291578f726f..e710f3c5a972 100644 --- a/drivers/md/dm-vdo/vio.c +++ b/drivers/md/dm-vdo/vio.c @@ -202,6 +202,7 @@ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback, if (data == NULL) return VDO_SUCCESS; + bio->bi_ioprio = 0; bio->bi_io_vec = bio->bi_inline_vecs; bio->bi_max_vecs = vio->block_count + 1; len = VDO_BLOCK_SIZE * vio->block_count; diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 62b1a44b8dd2..e61855da6461 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -40,35 +40,23 @@ static inline u64 fec_interleave(struct dm_verity *v, u64 offset) } /* - * Decode an RS block using Reed-Solomon. - */ -static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, - u8 *data, u8 *fec, int neras) -{ - int i; - uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN]; - - for (i = 0; i < v->fec->roots; i++) - par[i] = fec[i]; - - return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras, - fio->erasures, 0, NULL); -} - -/* * Read error-correcting codes for the requested RS block. Returns a pointer * to the data block. Caller is responsible for releasing buf. */ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, - unsigned int *offset, struct dm_buffer **buf, - unsigned short ioprio) + unsigned int *offset, unsigned int par_buf_offset, + struct dm_buffer **buf, unsigned short ioprio) { u64 position, block, rem; u8 *res; + /* We have already part of parity bytes read, skip to the next block */ + if (par_buf_offset) + index++; + position = (index + rsb) * v->fec->roots; block = div64_u64_rem(position, v->fec->io_size, &rem); - *offset = (unsigned int)rem; + *offset = par_buf_offset ? 0 : (unsigned int)rem; res = dm_bufio_read_with_ioprio(v->fec->bufio, block, buf, ioprio); if (IS_ERR(res)) { @@ -128,11 +116,13 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io, { int r, corrected = 0, res; struct dm_buffer *buf; - unsigned int n, i, offset; + unsigned int n, i, j, offset, par_buf_offset = 0; + uint16_t par_buf[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN]; u8 *par, *block; struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); - par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio)); + par = fec_read_parity(v, rsb, block_offset, &offset, + par_buf_offset, &buf, bio_prio(bio)); if (IS_ERR(par)) return PTR_ERR(par); @@ -142,7 +132,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io, */ fec_for_each_buffer_rs_block(fio, n, i) { block = fec_buffer_rs_block(v, fio, n, i); - res = fec_decode_rs8(v, fio, block, &par[offset], neras); + for (j = 0; j < v->fec->roots - par_buf_offset; j++) + par_buf[par_buf_offset + j] = par[offset + j]; + /* Decode an RS block using Reed-Solomon */ + res = decode_rs8(fio->rs, block, par_buf, v->fec->rsn, + NULL, neras, fio->erasures, 0, NULL); if (res < 0) { r = res; goto error; @@ -155,12 +149,22 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io, if (block_offset >= 1 << v->data_dev_block_bits) goto done; - /* read the next block when we run out of parity bytes */ - offset += v->fec->roots; + /* Read the next block when we run out of parity bytes */ + offset += (v->fec->roots - par_buf_offset); + /* Check if parity bytes are split between blocks */ + if (offset < v->fec->io_size && (offset + v->fec->roots) > v->fec->io_size) { + par_buf_offset = v->fec->io_size - offset; + for (j = 0; j < par_buf_offset; j++) + par_buf[j] = par[offset + j]; + offset += par_buf_offset; + } else + par_buf_offset = 0; + if (offset >= v->fec->io_size) { dm_bufio_release(buf); - par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio)); + par = fec_read_parity(v, rsb, block_offset, &offset, + par_buf_offset, &buf, bio_prio(bio)); if (IS_ERR(par)) return PTR_ERR(par); } @@ -724,10 +728,7 @@ int verity_fec_ctr(struct dm_verity *v) return -E2BIG; } - if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1)) - f->io_size = 1 << v->data_dev_block_bits; - else - f->io_size = v->fec->roots << SECTOR_SHIFT; + f->io_size = 1 << v->data_dev_block_bits; f->bufio = dm_bufio_client_create(f->dev->bdev, f->io_size, diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index c142ec5458b7..47d595f6a76e 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -93,7 +93,7 @@ static void dm_bufio_alloc_callback(struct dm_buffer *buf) */ static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector) { - return v->data_start + dm_target_offset(v->ti, bi_sector); + return dm_target_offset(v->ti, bi_sector); } /* @@ -952,7 +952,7 @@ static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev *bdev = v->data_dev->bdev; - if (v->data_start || ti->len != bdev_nr_sectors(v->data_dev->bdev)) + if (ti->len != bdev_nr_sectors(v->data_dev->bdev)) return 1; return 0; } @@ -962,7 +962,7 @@ static int verity_iterate_devices(struct dm_target *ti, { struct dm_verity *v = ti->private; - return fn(ti, v->data_dev, v->data_start, ti->len, data); + return fn(ti, v->data_dev, 0, ti->len, data); } static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index c996140bda94..8cbb57862ae1 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -50,7 +50,6 @@ struct dm_verity { unsigned int sig_size; /* root digest signature size */ #endif /* CONFIG_SECURITY */ unsigned int salt_size; - sector_t data_start; /* data offset in 512-byte sectors */ sector_t hash_start; /* hash start in blocks */ sector_t data_blocks; /* the number of data blocks */ sector_t hash_blocks; /* the number of hash blocks */ diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index c0d41c36e06e..20edd3fabbab 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -344,7 +344,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); } else { set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - lim->max_zone_append_sectors = 0; + lim->max_hw_zone_append_sectors = 0; } /* @@ -379,7 +379,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, if (!zlim.mapped_nr_seq_zones) { lim->max_open_zones = 0; lim->max_active_zones = 0; - lim->max_zone_append_sectors = 0; + lim->max_hw_zone_append_sectors = 0; lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 8156881a31de..deff22ecccbb 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -245,11 +245,6 @@ unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd) return zmd->zone_nr_blocks; } -unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd) -{ - return zmd->zone_nr_blocks_shift; -} - unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd) { return zmd->zone_nr_sectors; @@ -3005,48 +3000,3 @@ void dmz_dtr_metadata(struct dmz_metadata *zmd) dmz_cleanup_metadata(zmd); kfree(zmd); } - -/* - * Check zone information on resume. - */ -int dmz_resume_metadata(struct dmz_metadata *zmd) -{ - struct dm_zone *zone; - sector_t wp_block; - unsigned int i; - int ret; - - /* Check zones */ - for (i = 0; i < zmd->nr_zones; i++) { - zone = dmz_get(zmd, i); - if (!zone) { - dmz_zmd_err(zmd, "Unable to get zone %u", i); - return -EIO; - } - wp_block = zone->wp_block; - - ret = dmz_update_zone(zmd, zone); - if (ret) { - dmz_zmd_err(zmd, "Broken zone %u", i); - return ret; - } - - if (dmz_is_offline(zone)) { - dmz_zmd_warn(zmd, "Zone %u is offline", i); - continue; - } - - /* Check write pointer */ - if (!dmz_is_seq(zone)) - zone->wp_block = 0; - else if (zone->wp_block != wp_block) { - dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)", - i, (u64)zone->wp_block, (u64)wp_block); - zone->wp_block = wp_block; - dmz_invalidate_blocks(zmd, zone, zone->wp_block, - zmd->zone_nr_blocks - zone->wp_block); - } - } - - return 0; -} diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index d58db9a27e6c..76e2c6868548 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -76,9 +76,9 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, * pointer and the requested position. */ nr_blocks = block - wp_block; - ret = blkdev_issue_zeroout(dev->bdev, - dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), - dmz_blk2sect(nr_blocks), GFP_NOIO, 0); + ret = blk_zone_issue_zeroout(dev->bdev, + dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), + dmz_blk2sect(nr_blocks), GFP_NOIO); if (ret) { dmz_dev_err(dev, "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 265494d3f711..59ba0aaa9531 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -192,7 +192,6 @@ enum { int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, struct dmz_metadata **zmd, const char *devname); void dmz_dtr_metadata(struct dmz_metadata *zmd); -int dmz_resume_metadata(struct dmz_metadata *zmd); void dmz_lock_map(struct dmz_metadata *zmd); void dmz_unlock_map(struct dmz_metadata *zmd); @@ -230,7 +229,6 @@ unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx); unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx); unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx); unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd); -unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd); unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd); unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 19230404d8c2..12ecf07a3841 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2517,12 +2517,6 @@ void dm_unlock_md_type(struct mapped_device *md) mutex_unlock(&md->type_lock); } -void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type) -{ - BUG_ON(!mutex_is_locked(&md->type_lock)); - md->type = type; -} - enum dm_queue_mode dm_get_md_type(struct mapped_device *md) { return md->type; @@ -3349,6 +3343,59 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) kfree(pools); } +struct dm_blkdev_id { + u8 *id; + enum blk_unique_id type; +}; + +static int __dm_get_unique_id(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_blkdev_id *dm_id = data; + const struct block_device_operations *fops = dev->bdev->bd_disk->fops; + + if (!fops->get_unique_id) + return 0; + + return fops->get_unique_id(dev->bdev->bd_disk, dm_id->id, dm_id->type); +} + +/* + * Allow access to get_unique_id() for the first device returning a + * non-zero result. Reasonable use expects all devices to have the + * same unique id. + */ +static int dm_blk_get_unique_id(struct gendisk *disk, u8 *id, + enum blk_unique_id type) +{ + struct mapped_device *md = disk->private_data; + struct dm_table *table; + struct dm_target *ti; + int ret = 0, srcu_idx; + + struct dm_blkdev_id dm_id = { + .id = id, + .type = type, + }; + + table = dm_get_live_table(md, &srcu_idx); + if (!table || !dm_table_get_size(table)) + goto out; + + /* We only support devices that have a single target */ + if (table->num_targets != 1) + goto out; + ti = dm_table_get_target(table, 0); + + if (!ti->type->iterate_devices) + goto out; + + ret = ti->type->iterate_devices(ti, __dm_get_unique_id, &dm_id); +out: + dm_put_live_table(md, srcu_idx); + return ret; +} + struct dm_pr { u64 old_key; u64 new_key; @@ -3674,6 +3721,7 @@ static const struct block_device_operations dm_blk_dops = { .ioctl = dm_blk_ioctl, .getgeo = dm_blk_getgeo, .report_zones = dm_blk_report_zones, + .get_unique_id = dm_blk_get_unique_id, .pr_ops = &dm_pr_ops, .owner = THIS_MODULE }; @@ -3683,6 +3731,7 @@ static const struct block_device_operations dm_rq_blk_dops = { .release = dm_blk_close, .ioctl = dm_blk_ioctl, .getgeo = dm_blk_getgeo, + .get_unique_id = dm_blk_get_unique_id, .pr_ops = &dm_pr_ops, .owner = THIS_MODULE }; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8ad782249af8..a0a8ff119815 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -71,12 +71,10 @@ enum dm_queue_mode dm_table_get_type(struct dm_table *t); struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct dm_target *dm_table_get_immutable_target(struct dm_table *t); struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); -bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); -void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type); enum dm_queue_mode dm_get_md_type(struct mapped_device *md); struct target_type *dm_get_immutable_target_type(struct mapped_device *md); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 29da10e6f703..c3a42dd66ce5 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap) queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); + destroy_work_on_stack(&unplug_work.work); } static void bitmap_unplug(struct mddev *mddev, bool sync) diff --git a/drivers/md/md.c b/drivers/md/md.c index 67108c397c5a..aebe12b0ee27 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9784,9 +9784,7 @@ EXPORT_SYMBOL(md_reap_sync_thread); void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); - wait_event_timeout(rdev->blocked_wait, - !test_bit(Blocked, &rdev->flags) && - !test_bit(BlockedBadBlocks, &rdev->flags), + wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev), msecs_to_jiffies(5000)); rdev_dec_pending(rdev, mddev); } @@ -9815,6 +9813,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, { struct mddev *mddev = rdev->mddev; int rv; + + /* + * Recording new badblocks for faulty rdev will force unnecessary + * super block updating. This is fragile for external management because + * userspace daemon may trying to remove this device and deadlock may + * occur. This will be probably solved in the mdadm, but it is safer to + * avoid it. + */ + if (test_bit(Faulty, &rdev->flags)) + return 1; + if (is_new) s += rdev->new_data_offset; else diff --git a/drivers/md/md.h b/drivers/md/md.h index 5d2e6bd58e4d..4ba93af36126 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); } +static inline bool rdev_blocked(struct md_rdev *rdev) +{ + /* + * Blocked will be set by error handler and cleared by daemon after + * updating superblock, meanwhile write IO should be blocked to prevent + * reading old data after power failure. + */ + if (test_bit(Blocked, &rdev->flags)) + return true; + + /* + * Faulty device should not be accessed anymore, there is no need to + * wait for bad block to be acknowledged. + */ + if (test_bit(Faulty, &rdev->flags)) + return false; + + /* rdev is blocked by badblocks. */ + if (test_bit(BlockedBadBlocks, &rdev->flags)) + return true; + + return false; +} + #define mddev_add_trace_msg(mddev, fmt, args...) \ do { \ if (!mddev_is_dm(mddev)) \ diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 157c9bd2fed7..8f8792e55806 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -917,23 +917,27 @@ static int load_ablock(struct dm_array_cursor *c) if (c->block) unlock_ablock(c->info, c->block); - c->block = NULL; - c->ab = NULL; c->index = 0; r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le); if (r) { DMERR("dm_btree_cursor_get_value failed"); - dm_btree_cursor_end(&c->cursor); + goto out; } else { r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab); if (r) { DMERR("get_ablock failed"); - dm_btree_cursor_end(&c->cursor); + goto out; } } + return 0; + +out: + dm_btree_cursor_end(&c->cursor); + c->block = NULL; + c->ab = NULL; return r; } @@ -956,10 +960,10 @@ EXPORT_SYMBOL_GPL(dm_array_cursor_begin); void dm_array_cursor_end(struct dm_array_cursor *c) { - if (c->block) { + if (c->block) unlock_ablock(c->info, c->block); - dm_btree_cursor_end(&c->cursor); - } + + dm_btree_cursor_end(&c->cursor); } EXPORT_SYMBOL_GPL(dm_array_cursor_end); @@ -999,6 +1003,7 @@ int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count) } count -= remaining; + c->index += (remaining - 1); r = dm_array_cursor_next(c); } while (!r); diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 3a19124ee279..22a551c407da 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -51,7 +51,7 @@ static int index_check(const struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR_LIMIT("i%s failed: csum %u != wanted %u", __func__, + DMERR_LIMIT("%s failed: csum %u != wanted %u", __func__, le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 32d587524778..7049ec7fb8eb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); @@ -466,6 +467,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) struct bio *split = bio_split(bio, zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, &mddev->bio_set); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -608,6 +615,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) if (sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, sectors, GFP_NOIO, &mddev->bio_set); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return true; + } bio_chain(split, bio); raid0_map_submit_bio(mddev, bio); bio = split; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6c9d24203f39..519c56f0ee3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1322,7 +1322,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, const enum req_op op = bio_op(bio); const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; - int rdisk; + int rdisk, error; bool r1bio_existed = !!r1_bio; /* @@ -1383,6 +1383,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, gfp, &conf->bio_split); + + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -1410,6 +1415,47 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; mddev_trace_remap(mddev, read_bio, r1_bio->sector); submit_bio_noacct(read_bio); + return; + +err_handle: + atomic_dec(&mirror->rdev->nr_pending); + bio->bi_status = errno_to_blk_status(error); + set_bit(R1BIO_Uptodate, &r1_bio->state); + raid_end_bio_io(r1_bio); +} + +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) +{ + struct r1conf *conf = mddev->private; + int disks = conf->raid_disks * 2; + int i; + +retry: + for (i = 0; i < disks; i++) { + struct md_rdev *rdev = conf->mirrors[i].rdev; + + if (!rdev) + continue; + + /* don't write here until the bad block is acknowledged */ + if (test_bit(WriteErrorSeen, &rdev->flags) && + rdev_has_badblock(rdev, bio->bi_iter.bi_sector, + bio_sectors(bio)) < 0) + set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { + if (bio->bi_opf & REQ_NOWAIT) + return false; + + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked", + rdev->raid_disk); + atomic_inc(&rdev->nr_pending); + md_wait_for_blocked_rdev(rdev, rdev->mddev); + goto retry; + } + } + + return true; } static void raid1_write_request(struct mddev *mddev, struct bio *bio, @@ -1417,9 +1463,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, { struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - int i, disks; + int i, disks, k, error; unsigned long flags; - struct md_rdev *blocked_rdev; int first_clone; int max_sectors; bool write_behind = false; @@ -1457,7 +1502,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, return; } - retry_write: + if (!wait_blocked_rdev(mddev, bio)) { + bio_wouldblock_error(bio); + return; + } + r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; @@ -1473,7 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ disks = conf->raid_disks * 2; - blocked_rdev = NULL; max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; @@ -1486,11 +1534,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags)) write_behind = true; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } r1_bio->bios[i] = NULL; if (!rdev || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) @@ -1506,13 +1549,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, &first_bad, &bad_sectors); - if (is_bad < 0) { - /* mustn't write here until the bad block is - * acknowledged*/ - set_bit(BlockedBadBlocks, &rdev->flags); - blocked_rdev = rdev; - break; - } if (is_bad && first_bad <= r1_bio->sector) { /* Cannot write here at all */ bad_sectors -= (r1_bio->sector - first_bad); @@ -1535,7 +1571,21 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, continue; } if (is_bad) { - int good_sectors = first_bad - r1_bio->sector; + int good_sectors; + + /* + * We cannot atomically write this, so just + * error in that case. It could be possible to + * atomically write other mirrors, but the + * complexity of supporting that is not worth + * the benefit. + */ + if (bio->bi_opf & REQ_ATOMIC) { + error = -EIO; + goto err_handle; + } + + good_sectors = first_bad - r1_bio->sector; if (good_sectors < max_sectors) max_sectors = good_sectors; } @@ -1543,27 +1593,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = bio; } - if (unlikely(blocked_rdev)) { - /* Wait for this device to become unblocked */ - int j; - - for (j = 0; j < i; j++) - if (r1_bio->bios[j]) - rdev_dec_pending(conf->mirrors[j].rdev, mddev); - mempool_free(r1_bio, &conf->r1bio_pool); - allow_barrier(conf, bio->bi_iter.bi_sector); - - if (bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return; - } - mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked", - blocked_rdev->raid_disk); - md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf, bio->bi_iter.bi_sector, false); - goto retry_write; - } - /* * When using a bitmap, we may call alloc_behind_master_bio below. * alloc_behind_master_bio allocates a copy of the data payload a page @@ -1576,6 +1605,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, GFP_NOIO, &conf->bio_split); + + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -1637,7 +1671,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); mbio->bi_end_io = raid1_end_write_request; - mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); + mbio->bi_opf = bio_op(bio) | + (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC)); if (test_bit(FailFast, &rdev->flags) && !test_bit(WriteMostly, &rdev->flags) && conf->raid_disks - mddev->degraded > 1) @@ -1660,6 +1695,18 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, /* In case raid1d snuck in to freeze_array */ wake_up_barrier(conf); + return; +err_handle: + for (k = 0; k < i; k++) { + if (r1_bio->bios[k]) { + rdev_dec_pending(conf->mirrors[k].rdev, mddev); + r1_bio->bios[k] = NULL; + } + } + + bio->bi_status = errno_to_blk_status(error); + set_bit(R1BIO_Uptodate, &r1_bio->state); + raid_end_bio_io(r1_bio); } static bool raid1_make_request(struct mddev *mddev, struct bio *bio) @@ -3192,6 +3239,7 @@ static int raid1_set_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 862b1fb71d86..7d7a8a2524dc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, int slot = r10_bio->read_slot; struct md_rdev *err_rdev = NULL; gfp_t gfp = GFP_NOIO; + int error; if (slot >= 0 && r10_bio->devs[slot].rdev) { /* @@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, gfp, &conf->bio_split); + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); @@ -1236,6 +1241,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, mddev_trace_remap(mddev, read_bio, r10_bio->sector); submit_bio_noacct(read_bio); return; +err_handle: + atomic_dec(&rdev->nr_pending); + bio->bi_status = errno_to_blk_status(error); + set_bit(R10BIO_Uptodate, &r10_bio->state); + raid_end_bio_io(r10_bio); } static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, @@ -1245,6 +1255,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, const enum req_op op = bio_op(bio); const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; const blk_opf_t do_fua = bio->bi_opf & REQ_FUA; + const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC; unsigned long flags; struct r10conf *conf = mddev->private; struct md_rdev *rdev; @@ -1263,7 +1274,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); mbio->bi_end_io = raid10_end_write_request; - mbio->bi_opf = op | do_sync | do_fua; + mbio->bi_opf = op | do_sync | do_fua | do_atomic; if (!replacement && test_bit(FailFast, &conf->mirrors[devnum].rdev->flags) && enough(conf, devnum)) @@ -1285,9 +1296,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) { - int i; struct r10conf *conf = mddev->private; struct md_rdev *blocked_rdev; + int i; retry_wait: blocked_rdev = NULL; @@ -1295,40 +1306,36 @@ retry_wait: struct md_rdev *rdev, *rrdev; rdev = conf->mirrors[i].rdev; - rrdev = conf->mirrors[i].replacement; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } - if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { - atomic_inc(&rrdev->nr_pending); - blocked_rdev = rrdev; - break; - } - - if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { + if (rdev) { sector_t dev_sector = r10_bio->devs[i].addr; /* * Discard request doesn't care the write result * so it doesn't need to wait blocked disk here. */ - if (!r10_bio->sectors) - continue; - - if (rdev_has_badblock(rdev, dev_sector, - r10_bio->sectors) < 0) { + if (test_bit(WriteErrorSeen, &rdev->flags) && + r10_bio->sectors && + rdev_has_badblock(rdev, dev_sector, + r10_bio->sectors) < 0) /* - * Mustn't write here until the bad block - * is acknowledged + * Mustn't write here until the bad + * block is acknowledged */ - atomic_inc(&rdev->nr_pending); set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); break; } } + + rrdev = conf->mirrors[i].replacement; + if (rrdev && rdev_blocked(rrdev)) { + atomic_inc(&rrdev->nr_pending); + blocked_rdev = rrdev; + break; + } } if (unlikely(blocked_rdev)) { @@ -1347,9 +1354,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, struct r10bio *r10_bio) { struct r10conf *conf = mddev->private; - int i; + int i, k; sector_t sectors; int max_sectors; + int error; if ((mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, @@ -1461,7 +1469,21 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, continue; } if (is_bad) { - int good_sectors = first_bad - dev_sector; + int good_sectors; + + /* + * We cannot atomically write this, so just + * error in that case. It could be possible to + * atomically write other mirrors, but the + * complexity of supporting that is not worth + * the benefit. + */ + if (bio->bi_opf & REQ_ATOMIC) { + error = -EIO; + goto err_handle; + } + + good_sectors = first_bad - dev_sector; if (good_sectors < max_sectors) max_sectors = good_sectors; } @@ -1482,6 +1504,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, if (r10_bio->sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, r10_bio->sectors, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); @@ -1503,6 +1529,26 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, raid10_write_one_disk(mddev, r10_bio, bio, true, i); } one_write_done(r10_bio); + return; +err_handle: + for (k = 0; k < i; k++) { + int d = r10_bio->devs[k].devnum; + struct md_rdev *rdev = conf->mirrors[d].rdev; + struct md_rdev *rrdev = conf->mirrors[d].replacement; + + if (r10_bio->devs[k].bio) { + rdev_dec_pending(rdev, mddev); + r10_bio->devs[k].bio = NULL; + } + if (r10_bio->devs[k].repl_bio) { + rdev_dec_pending(rrdev, mddev); + r10_bio->devs[k].repl_bio = NULL; + } + } + + bio->bi_status = errno_to_blk_status(error); + set_bit(R10BIO_Uptodate, &r10_bio->state); + raid_end_bio_io(r10_bio); } static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) @@ -1644,6 +1690,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (remainder) { split_size = stripe_size - remainder; split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return 0; + } bio_chain(split, bio); allow_barrier(conf); /* Resend the fist split part */ @@ -1654,6 +1705,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (remainder) { split_size = bio_sectors(bio) - remainder; split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return 0; + } bio_chain(split, bio); allow_barrier(conf); /* Resend the second split part */ @@ -3984,6 +4040,7 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = 0; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index a70cbec12ed0..37c4da5311ca 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(ppl_conf->signature); - io->seq = atomic64_add_return(1, &ppl_conf->seq); + io->seq = atomic64_inc_return(&ppl_conf->seq); pplhdr->generation = cpu_to_le64(io->seq); return io; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dc2ea636d173..f09e7677ee9f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (rdev) { is_bad = rdev_has_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf)); - if (s->blocked_rdev == NULL - && (test_bit(Blocked, &rdev->flags) - || is_bad < 0)) { + if (s->blocked_rdev == NULL) { if (is_bad < 0) - set_bit(BlockedBadBlocks, - &rdev->flags); - s->blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + if (rdev_blocked(rdev)) { + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } } clear_bit(R5_Insync, &dev->flags); @@ -7177,6 +7176,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) err = mddev_suspend_and_lock(mddev); if (err) return err; + raid5_quiesce(mddev, true); + conf = mddev->private; if (!conf) err = -ENODEV; @@ -7198,6 +7199,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) kfree(old_groups); } } + + raid5_quiesce(mddev, false); mddev_unlock_and_resume(mddev); return err ?: len; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896ecfc4afa6..d174e586698f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -633,7 +633,7 @@ struct r5conf { * two caches. */ int active_name; - char cache_name[2][32]; + char cache_name[2][48]; struct kmem_cache *slab_cache; /* for allocating stripes */ struct mutex cache_size_mutex; /* Protect changes to cache size */ |