1 files changed, 133 insertions, 98 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 47b6ff2a7b16..018b9abf524b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5787,17 +5787,139 @@ static bool ahead_of_reshape(struct mddev *mddev, sector_t sector,
 					  sector >= reshape_sector;
 }
 
+enum stripe_result {
+	STRIPE_SUCCESS = 0,
+	STRIPE_RETRY,
+	STRIPE_SCHEDULE_AND_RETRY,
+	STRIPE_FAIL,
+};
+
+struct stripe_request_ctx {
+	/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
+	bool do_flush;
+};
+
+static enum stripe_result make_stripe_request(struct mddev *mddev,
+		struct r5conf *conf, struct stripe_request_ctx *ctx,
+		sector_t logical_sector, struct bio *bi, int seq)
+{
+	const int rw = bio_data_dir(bi);
+	enum stripe_result ret;
+	struct stripe_head *sh;
+	sector_t new_sector;
+	int previous = 0;
+	int dd_idx;
+
+	if (unlikely(conf->reshape_progress != MaxSector)) {
+		/*
+		 * Spinlock is needed as reshape_progress may be
+		 * 64bit on a 32bit platform, and so it might be
+		 * possible to see a half-updated value
+		 * Of course reshape_progress could change after
+		 * the lock is dropped, so once we get a reference
+		 * to the stripe that we think it is, we will have
+		 * to check again.
+		 */
+		spin_lock_irq(&conf->device_lock);
+		if (ahead_of_reshape(mddev, logical_sector,
+				     conf->reshape_progress)) {
+			previous = 1;
+		} else {
+			if (ahead_of_reshape(mddev, logical_sector,
+					     conf->reshape_safe)) {
+				spin_unlock_irq(&conf->device_lock);
+				return STRIPE_SCHEDULE_AND_RETRY;
+			}
+		}
+		spin_unlock_irq(&conf->device_lock);
+	}
+
+	new_sector = raid5_compute_sector(conf, logical_sector, previous,
+					  &dd_idx, NULL);
+	pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
+		 new_sector, logical_sector);
+
+	sh = raid5_get_active_stripe(conf, new_sector, previous,
+				     (bi->bi_opf & REQ_RAHEAD), 0);
+	if (unlikely(!sh)) {
+		/* cannot get stripe, just give-up */
+		bi->bi_status = BLK_STS_IOERR;
+		return STRIPE_FAIL;
+	}
+
+	if (unlikely(previous)) {
+		/*
+		 * Expansion might have moved on while waiting for a
+		 * stripe, so we must do the range check again.
+		 * Expansion could still move past after this
+		 * test, but as we are holding a reference to
+		 * 'sh', we know that if that happens,
+		 *  STRIPE_EXPANDING will get set and the expansion
+		 * won't proceed until we finish with the stripe.
+		 */
+		int must_retry = 0;
+		spin_lock_irq(&conf->device_lock);
+		if (!ahead_of_reshape(mddev, logical_sector,
+				      conf->reshape_progress))
+			/* mismatch, need to try again */
+			must_retry = 1;
+		spin_unlock_irq(&conf->device_lock);
+		if (must_retry) {
+			ret = STRIPE_SCHEDULE_AND_RETRY;
+			goto out_release;
+		}
+	}
+
+	if (read_seqcount_retry(&conf->gen_lock, seq)) {
+		/* Might have got the wrong stripe_head by accident */
+		ret = STRIPE_RETRY;
+		goto out_release;
+	}
+
+	if (test_bit(STRIPE_EXPANDING, &sh->state) ||
+	    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
+		/*
+		 * Stripe is busy expanding or add failed due to
+		 * overlap. Flush everything and wait a while.
+		 */
+		md_wakeup_thread(mddev->thread);
+		ret = STRIPE_SCHEDULE_AND_RETRY;
+		goto out_release;
+	}
+
+	if (stripe_can_batch(sh))
+		stripe_add_to_batch_list(conf, sh);
+
+	if (ctx->do_flush) {
+		set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
+		/* we only need flush for one stripe */
+		ctx->do_flush = false;
+	}
+
+	set_bit(STRIPE_HANDLE, &sh->state);
+	clear_bit(STRIPE_DELAYED, &sh->state);
+	if ((!sh->batch_head || sh == sh->batch_head) &&
+	    (bi->bi_opf & REQ_SYNC) &&
+	    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+		atomic_inc(&conf->preread_active_stripes);
+
+	release_stripe_plug(mddev, sh);
+	return STRIPE_SUCCESS;
+
+out_release:
+	raid5_release_stripe(sh);
+	return ret;
+}
+
 static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
 	struct r5conf *conf = mddev->private;
-	int dd_idx;
-	sector_t new_sector;
 	sector_t logical_sector, last_sector;
-	struct stripe_head *sh;
+	struct stripe_request_ctx ctx = {};
 	const int rw = bio_data_dir(bi);
+	enum stripe_result res;
 	DEFINE_WAIT(w);
 	bool do_prepare;
-	bool do_flush = false;
 
 	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
 		int ret = log_handle_flush_request(conf, bi);
@@ -5813,7 +5935,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
 		 * we need to flush journal device
 		 */
-		do_flush = bi->bi_opf & REQ_PREFLUSH;
+		ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
 	}
 
 	if (!md_write_start(mddev, bi))
@@ -5853,117 +5975,30 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	md_account_bio(mddev, &bi);
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
-		int previous;
 		int seq;
 
 		do_prepare = false;
 	retry:
 		seq = read_seqcount_begin(&conf->gen_lock);
-		previous = 0;
 		if (do_prepare)
 			prepare_to_wait(&conf->wait_for_overlap, &w,
 				TASK_UNINTERRUPTIBLE);
-		if (unlikely(conf->reshape_progress != MaxSector)) {
-			/* spinlock is needed as reshape_progress may be
-			 * 64bit on a 32bit platform, and so it might be
-			 * possible to see a half-updated value
-			 * Of course reshape_progress could change after
-			 * the lock is dropped, so once we get a reference
-			 * to the stripe that we think it is, we will have
-			 * to check again.
-			 */
-			spin_lock_irq(&conf->device_lock);
-			if (ahead_of_reshape(mddev, logical_sector,
-					     conf->reshape_progress)) {
-				previous = 1;
-			} else {
-				if (ahead_of_reshape(mddev, logical_sector,
-						     conf->reshape_safe)) {
-					spin_unlock_irq(&conf->device_lock);
-					schedule();
-					do_prepare = true;
-					goto retry;
-				}
-			}
-			spin_unlock_irq(&conf->device_lock);
-		}
-
-		new_sector = raid5_compute_sector(conf, logical_sector,
-						  previous,
-						  &dd_idx, NULL);
-		pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
-			(unsigned long long)new_sector,
-			(unsigned long long)logical_sector);
 
-		sh = raid5_get_active_stripe(conf, new_sector, previous,
-				       (bi->bi_opf & REQ_RAHEAD), 0);
-		if (unlikely(!sh)) {
-			/* cannot get stripe, just give-up */
-			bi->bi_status = BLK_STS_IOERR;
+		res = make_stripe_request(mddev, conf, &ctx, logical_sector,
+					  bi, seq);
+		if (res == STRIPE_FAIL)
 			break;
-		}
-
-		if (unlikely(previous)) {
-			/* expansion might have moved on while waiting for a
-			 * stripe, so we must do the range check again.
-			 * Expansion could still move past after this
-			 * test, but as we are holding a reference to
-			 * 'sh', we know that if that happens,
-			 *  STRIPE_EXPANDING will get set and the expansion
-			 * won't proceed until we finish with the stripe.
-			 */
-			int must_retry = 0;
-			spin_lock_irq(&conf->device_lock);
-			if (!ahead_of_reshape(mddev, logical_sector,
-					      conf->reshape_progress))
-				/* mismatch, need to try again */
-				must_retry = 1;
-			spin_unlock_irq(&conf->device_lock);
-			if (must_retry) {
-				raid5_release_stripe(sh);
-				schedule();
-				do_prepare = true;
-				goto retry;
-			}
-		}
 
-		if (read_seqcount_retry(&conf->gen_lock, seq)) {
-			/* Might have got the wrong stripe_head by accident */
-			raid5_release_stripe(sh);
+		if (res == STRIPE_RETRY)
 			goto retry;
-		}
 
-		if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-		    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
-			/*
-			 * Stripe is busy expanding or add failed due to
-			 * overlap. Flush everything and wait a while.
-			 */
-			md_wakeup_thread(mddev->thread);
-			raid5_release_stripe(sh);
+		if (res == STRIPE_SCHEDULE_AND_RETRY) {
 			schedule();
 			do_prepare = true;
 			goto retry;
 		}
-
-		if (stripe_can_batch(sh))
-			stripe_add_to_batch_list(conf, sh);
-
-		if (do_flush) {
-			set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
-			/* we only need flush for one stripe */
-			do_flush = false;
-		}
-
-		set_bit(STRIPE_HANDLE, &sh->state);
-		clear_bit(STRIPE_DELAYED, &sh->state);
-		if ((!sh->batch_head || sh == sh->batch_head) &&
-		    (bi->bi_opf & REQ_SYNC) &&
-		    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-			atomic_inc(&conf->preread_active_stripes);
-
-		release_stripe_plug(mddev, sh);
 	}
+
 	finish_wait(&conf->wait_for_overlap, &w);
 
 	if (rw == WRITE)