diff options
Diffstat (limited to 'drivers/md/dm-mpath.c')
| -rw-r--r-- | drivers/md/dm-mpath.c | 416 |
1 files changed, 146 insertions, 270 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3570bcb7a4a4..0e8ab5bb3575 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -90,13 +90,7 @@ struct multipath { atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ - unsigned queue_mode; - - /* - * We must use a mempool of dm_mpath_io structs so that we - * can resubmit bios on error. - */ - mempool_t *mpio_pool; + enum dm_queue_mode queue_mode; struct mutex work_mutex; struct work_struct trigger_event; @@ -115,11 +109,10 @@ struct dm_mpath_io { typedef int (*action_fn) (struct pgpath *pgpath); -static struct kmem_cache *_mpio_cache; - static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void trigger_event(struct work_struct *work); -static void activate_path(struct work_struct *work); +static void activate_or_offline_path(struct pgpath *pgpath); +static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); /*----------------------------------------------- @@ -144,7 +137,7 @@ static struct pgpath *alloc_pgpath(void) if (pgpath) { pgpath->is_active = true; - INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); + INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work); } return pgpath; @@ -209,7 +202,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti) init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); - m->mpio_pool = NULL; m->queue_mode = DM_TYPE_NONE; m->ti = ti; @@ -229,16 +221,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; else m->queue_mode = DM_TYPE_REQUEST_BASED; - } - - if (m->queue_mode == DM_TYPE_REQUEST_BASED) { - unsigned min_ios = dm_get_reserved_rq_based_ios(); - - m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); - if (!m->mpio_pool) - return -ENOMEM; - } - else if (m->queue_mode == DM_TYPE_BIO_BASED) { + } else if (m->queue_mode == DM_TYPE_BIO_BASED) { INIT_WORK(&m->process_queued_bios, process_queued_bios); /* * bio-based doesn't support any direct scsi_dh management; @@ -263,7 +246,6 @@ static void free_multipath(struct multipath *m) kfree(m->hw_handler_name); kfree(m->hw_handler_params); - mempool_destroy(m->mpio_pool); kfree(m); } @@ -272,38 +254,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info) return info->ptr; } -static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info) -{ - struct dm_mpath_io *mpio; - - if (!m->mpio_pool) { - /* Use blk-mq pdu memory requested via per_io_data_size */ - mpio = get_mpio(info); - memset(mpio, 0, sizeof(*mpio)); - return mpio; - } - - mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); - if (!mpio) - return NULL; - - memset(mpio, 0, sizeof(*mpio)); - info->ptr = mpio; - - return mpio; -} - -static void clear_request_fn_mpio(struct multipath *m, union map_info *info) -{ - /* Only needed for non blk-mq (.request_fn) multipath */ - if (m->mpio_pool) { - struct dm_mpath_io *mpio = info->ptr; - - info->ptr = NULL; - mempool_free(mpio, m->mpio_pool); - } -} - static size_t multipath_per_bio_data_size(void) { return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); @@ -348,6 +298,8 @@ static int __pg_init_all_paths(struct multipath *m) struct pgpath *pgpath; unsigned long pg_init_delay = 0; + lockdep_assert_held(&m->lock); + if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) return 0; @@ -372,13 +324,16 @@ static int __pg_init_all_paths(struct multipath *m) return atomic_read(&m->pg_init_in_progress); } -static void pg_init_all_paths(struct multipath *m) +static int pg_init_all_paths(struct multipath *m) { + int ret; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - __pg_init_all_paths(m); + ret = __pg_init_all_paths(m); spin_unlock_irqrestore(&m->lock, flags); + + return ret; } static void __switch_pg(struct multipath *m, struct priority_group *pg) @@ -487,59 +442,35 @@ failed: } /* - * Check whether bios must be queued in the device-mapper core rather - * than here in the target. - * - * If m->queue_if_no_path and m->saved_queue_if_no_path hold the - * same value then we are not between multipath_presuspend() - * and multipath_resume() calls and we have no need to check - * for the DMF_NOFLUSH_SUSPENDING flag. + * dm_report_EIO() is a macro instead of a function to make pr_debug() + * report the function name and line number of the function from which + * it has been invoked. */ -static bool __must_push_back(struct multipath *m) -{ - return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) != - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) && - dm_noflush_suspending(m->ti)); -} - -static bool must_push_back_rq(struct multipath *m) -{ - bool r; - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || - __must_push_back(m)); - spin_unlock_irqrestore(&m->lock, flags); - - return r; -} - -static bool must_push_back_bio(struct multipath *m) -{ - bool r; - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - r = __must_push_back(m); - spin_unlock_irqrestore(&m->lock, flags); - - return r; -} +#define dm_report_EIO(m) \ +do { \ + struct mapped_device *md = dm_table_get_md((m)->ti->table); \ + \ + pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \ + dm_device_name(md), \ + test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ + dm_noflush_suspending((m)->ti)); \ +} while (0) /* * Map cloned requests (request-based multipath) */ -static int __multipath_map(struct dm_target *ti, struct request *clone, - union map_info *map_context, - struct request *rq, struct request **__clone) +static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, + union map_info *map_context, + struct request **__clone) { struct multipath *m = ti->private; - int r = DM_MAPIO_REQUEUE; - size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); + size_t nr_bytes = blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; - struct dm_mpath_io *mpio; + struct dm_mpath_io *mpio = get_mpio(map_context); + struct request_queue *q; + struct request *clone; /* Do we need to select a new pgpath? */ pgpath = lockless_dereference(m->current_pgpath); @@ -547,51 +478,40 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (must_push_back_rq(m)) + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; - return -EIO; /* Failed */ + dm_report_EIO(m); /* Failed */ + return DM_MAPIO_KILL; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { - pg_init_all_paths(m); - return r; + if (pg_init_all_paths(m)) + return DM_MAPIO_DELAY_REQUEUE; + return DM_MAPIO_REQUEUE; } - mpio = set_mpio(m, map_context); - if (!mpio) - /* ENOMEM, requeue */ - return r; - + memset(mpio, 0, sizeof(*mpio)); mpio->pgpath = pgpath; mpio->nr_bytes = nr_bytes; bdev = pgpath->path.dev->bdev; - - if (clone) { - /* - * Old request-based interface: allocated clone is passed in. - * Used by: .request_fn stacked on .request_fn path(s). - */ - clone->q = bdev_get_queue(bdev); - clone->rq_disk = bdev->bd_disk; - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - } else { - /* - * blk-mq request-based interface; used by both: - * .request_fn stacked on blk-mq path(s) and - * blk-mq stacked on blk-mq path(s). - */ - clone = blk_mq_alloc_request(bdev_get_queue(bdev), - rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); - if (IS_ERR(clone)) { - /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ - clear_request_fn_mpio(m, map_context); - return r; + q = bdev_get_queue(bdev); + clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); + if (IS_ERR(clone)) { + /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ + bool queue_dying = blk_queue_dying(q); + DMERR_LIMIT("blk_get_request() returned %ld%s - requeuing", + PTR_ERR(clone), queue_dying ? " (path offline)" : ""); + if (queue_dying) { + atomic_inc(&m->pg_init_in_progress); + activate_or_offline_path(pgpath); + return DM_MAPIO_REQUEUE; } - clone->bio = clone->biotail = NULL; - clone->rq_disk = bdev->bd_disk; - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - *__clone = clone; + return DM_MAPIO_DELAY_REQUEUE; } + clone->bio = clone->biotail = NULL; + clone->rq_disk = bdev->bd_disk; + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + *__clone = clone; if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, @@ -600,22 +520,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, return DM_MAPIO_REMAPPED; } -static int multipath_map(struct dm_target *ti, struct request *clone, - union map_info *map_context) -{ - return __multipath_map(ti, clone, map_context, NULL, NULL); -} - -static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, - union map_info *map_context, - struct request **clone) -{ - return __multipath_map(ti, NULL, map_context, rq, clone); -} - static void multipath_release_clone(struct request *clone) { - blk_mq_free_request(clone); + blk_put_request(clone); } /* @@ -649,15 +556,16 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m } if (!pgpath) { - if (!must_push_back_bio(m)) - return -EIO; - return DM_MAPIO_REQUEUE; + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + return DM_MAPIO_REQUEUE; + dm_report_EIO(m); + return DM_MAPIO_KILL; } mpio->pgpath = pgpath; mpio->nr_bytes = nr_bytes; - bio->bi_error = 0; + bio->bi_status = 0; bio->bi_bdev = pgpath->path.dev->bdev; bio->bi_opf |= REQ_FAILFAST_TRANSPORT; @@ -713,15 +621,31 @@ static void process_queued_bios(struct work_struct *work) blk_start_plug(&plug); while ((bio = bio_list_pop(&bios))) { r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio)); - if (r < 0 || r == DM_MAPIO_REQUEUE) { - bio->bi_error = r; + switch (r) { + case DM_MAPIO_KILL: + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + break; + case DM_MAPIO_REQUEUE: + bio->bi_status = BLK_STS_DM_REQUEUE; bio_endio(bio); - } else if (r == DM_MAPIO_REMAPPED) + break; + case DM_MAPIO_REMAPPED: generic_make_request(bio); + break; + } } blk_finish_plug(&plug); } +static void assign_bit(bool value, long nr, unsigned long *addr) +{ + if (value) + set_bit(nr, addr); + else + clear_bit(nr, addr); +} + /* * If we run out of usable paths, should we queue I/O or error it? */ @@ -731,23 +655,11 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, unsigned long flags; spin_lock_irqsave(&m->lock, flags); - - if (save_old_value) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - } else { - if (queue_if_no_path) - set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - } - if (queue_if_no_path) - set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - + assign_bit((save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || + (!save_old_value && queue_if_no_path), + MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); + assign_bit(queue_if_no_path || dm_noflush_suspending(m->ti), + MPATHF_QUEUE_IF_NO_PATH, &m->flags); spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -1185,9 +1097,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; + ti->num_write_zeroes_bios = 1; if (m->queue_mode == DM_TYPE_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); - else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) + else ti->per_io_data_size = sizeof(struct dm_mpath_io); return 0; @@ -1519,10 +1432,8 @@ out: spin_unlock_irqrestore(&m->lock, flags); } -static void activate_path(struct work_struct *work) +static void activate_or_offline_path(struct pgpath *pgpath) { - struct pgpath *pgpath = - container_of(work, struct pgpath, activate_path.work); struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); if (pgpath->is_active && !blk_queue_dying(q)) @@ -1531,22 +1442,23 @@ static void activate_path(struct work_struct *work) pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); } -static int noretry_error(int error) +static void activate_path_work(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, activate_path.work); + + activate_or_offline_path(pgpath); +} + +static int noretry_error(blk_status_t error) { switch (error) { - case -EBADE: - /* - * EBADE signals an reservation conflict. - * We shouldn't fail the path here as we can communicate with - * the target. We should failover to the next path, but in - * doing so we might be causing a ping-pong between paths. - * So just return the reservation conflict error. - */ - case -EOPNOTSUPP: - case -EREMOTEIO: - case -EILSEQ: - case -ENODATA: - case -ENOSPC: + case BLK_STS_NOTSUPP: + case BLK_STS_NOSPC: + case BLK_STS_TARGET: + case BLK_STS_NEXUS: + case BLK_STS_MEDIUM: + case BLK_STS_RESOURCE: return 1; } @@ -1554,12 +1466,13 @@ static int noretry_error(int error) return 0; } -/* - * end_io handling - */ -static int do_end_io(struct multipath *m, struct request *clone, - int error, struct dm_mpath_io *mpio) +static int multipath_end_io(struct dm_target *ti, struct request *clone, + blk_status_t error, union map_info *map_context) { + struct dm_mpath_io *mpio = get_mpio(map_context); + struct pgpath *pgpath = mpio->pgpath; + int r = DM_ENDIO_DONE; + /* * We don't queue any clone request inside the multipath target * during end I/O handling, since those clone requests don't have @@ -1571,70 +1484,53 @@ static int do_end_io(struct multipath *m, struct request *clone, * request into dm core, which will remake a clone request and * clone bios for it and resubmit it later. */ - int r = DM_ENDIO_REQUEUE; + if (error && !noretry_error(error)) { + struct multipath *m = ti->private; - if (!error && !clone->errors) - return 0; /* I/O complete */ + r = DM_ENDIO_REQUEUE; - if (noretry_error(error)) - return error; - - if (mpio->pgpath) - fail_path(mpio->pgpath); + if (pgpath) + fail_path(pgpath); - if (!atomic_read(&m->nr_valid_paths)) { - if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (!must_push_back_rq(m)) - r = -EIO; + if (atomic_read(&m->nr_valid_paths) == 0 && + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + if (error == BLK_STS_IOERR) + dm_report_EIO(m); + /* complete with the original error */ + r = DM_ENDIO_DONE; } } - return r; -} - -static int multipath_end_io(struct dm_target *ti, struct request *clone, - int error, union map_info *map_context) -{ - struct multipath *m = ti->private; - struct dm_mpath_io *mpio = get_mpio(map_context); - struct pgpath *pgpath; - struct path_selector *ps; - int r; - - BUG_ON(!mpio); - - r = do_end_io(m, clone, error, mpio); - pgpath = mpio->pgpath; if (pgpath) { - ps = &pgpath->pg->ps; + struct path_selector *ps = &pgpath->pg->ps; + if (ps->type->end_io) ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); } - clear_request_fn_mpio(m, map_context); return r; } -static int do_end_io_bio(struct multipath *m, struct bio *clone, - int error, struct dm_mpath_io *mpio) +static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, + blk_status_t *error) { + struct multipath *m = ti->private; + struct dm_mpath_io *mpio = get_mpio_from_bio(clone); + struct pgpath *pgpath = mpio->pgpath; unsigned long flags; + int r = DM_ENDIO_DONE; - if (!error) - return 0; /* I/O complete */ - - if (noretry_error(error)) - return error; + if (!*error || noretry_error(*error)) + goto done; - if (mpio->pgpath) - fail_path(mpio->pgpath); + if (pgpath) + fail_path(pgpath); - if (!atomic_read(&m->nr_valid_paths)) { - if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (!must_push_back_bio(m)) - return -EIO; - return DM_ENDIO_REQUEUE; - } + if (atomic_read(&m->nr_valid_paths) == 0 && + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + dm_report_EIO(m); + *error = BLK_STS_IOERR; + goto done; } /* Queue for the daemon to resubmit */ @@ -1646,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) queue_work(kmultipathd, &m->process_queued_bios); - return DM_ENDIO_INCOMPLETE; -} - -static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error) -{ - struct multipath *m = ti->private; - struct dm_mpath_io *mpio = get_mpio_from_bio(clone); - struct pgpath *pgpath; - struct path_selector *ps; - int r; - - BUG_ON(!mpio); - - r = do_end_io_bio(m, clone, error, mpio); - pgpath = mpio->pgpath; + r = DM_ENDIO_INCOMPLETE; +done: if (pgpath) { - ps = &pgpath->pg->ps; + struct path_selector *ps = &pgpath->pg->ps; + if (ps->type->end_io) ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); } @@ -1701,10 +1585,8 @@ static void multipath_resume(struct dm_target *ti) unsigned long flags; spin_lock_irqsave(&m->lock, flags); - if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) - set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); + assign_bit(test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags), + MPATHF_QUEUE_IF_NO_PATH, &m->flags); spin_unlock_irqrestore(&m->lock, flags); } @@ -1764,6 +1646,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type, case DM_TYPE_MQ_REQUEST_BASED: DMEMIT("queue_mode mq "); break; + default: + WARN_ON_ONCE(true); + break; } } } @@ -2060,7 +1945,6 @@ static struct target_type multipath_target = { .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, - .map_rq = multipath_map, .clone_and_map_rq = multipath_clone_and_map, .release_clone_rq = multipath_release_clone, .rq_end_io = multipath_end_io, @@ -2080,11 +1964,6 @@ static int __init dm_multipath_init(void) { int r; - /* allocate a slab for the dm_mpath_ios */ - _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); - if (!_mpio_cache) - return -ENOMEM; - r = dm_register_target(&multipath_target); if (r < 0) { DMERR("request-based register failed %d", r); @@ -2120,8 +1999,6 @@ bad_alloc_kmpath_handlerd: bad_alloc_kmultipathd: dm_unregister_target(&multipath_target); bad_register_target: - kmem_cache_destroy(_mpio_cache); - return r; } @@ -2131,7 +2008,6 @@ static void __exit dm_multipath_exit(void) destroy_workqueue(kmultipathd); dm_unregister_target(&multipath_target); - kmem_cache_destroy(_mpio_cache); } module_init(dm_multipath_init); |