From a63a5cf84dac7a23a57c800eea5734701e7d3c04 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 1 Apr 2011 21:02:31 +0200
Subject: dm: improve block integrity support

The current block integrity (DIF/DIX) support in DM is verifying that
all devices' integrity profiles match during DM device resume (which
is past the point of no return).  To some degree that is unavoidable
(stacked DM devices force this late checking).  But for most DM
devices (which aren't stacking on other DM devices) the ideal time to
verify all integrity profiles match is during table load.

Introduce the notion of an "initialized" integrity profile: a profile
that was blk_integrity_register()'d with a non-NULL 'blk_integrity'
template.  Add blk_integrity_is_initialized() to allow checking if a
profile was initialized.

Update DM integrity support to:
- check all devices with _initialized_ integrity profiles match
  during table load; uninitialized profiles (e.g. for underlying DM
  device(s) of a stacked DM device) are ignored.
- disallow a table load that would result in an integrity profile that
  conflicts with a DM device's existing (in-use) integrity profile
- avoid clearing an existing integrity profile
- validate all integrity profiles match during resume; but if they
  don't all we can do is report the mismatch (during resume we're past
  the point of no return)

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 16a902f099ac..32176cc8e715 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1206,6 +1206,7 @@ struct blk_integrity {
 	struct kobject		kobj;
 };
 
+extern bool blk_integrity_is_initialized(struct gendisk *);
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
@@ -1262,6 +1263,7 @@ queue_max_integrity_segments(struct request_queue *q)
 #define queue_max_integrity_segments(a)		(0)
 #define blk_integrity_merge_rq(a, b, c)		(0)
 #define blk_integrity_merge_bio(a, b, c)	(0)
+#define blk_integrity_is_initialized(a)		(0)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit 


From f75664570d8b75469cc468f23c2b27220984983b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 12 Apr 2011 10:17:31 +0200
Subject: block: add callback function for unplug notification

MD would like to know when a queue is unplugged, so it can flush
it's bitmap writes. Add such a callback.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       |  3 +++
 block/blk-settings.c   | 16 ++++++++++++++++
 include/linux/blkdev.h |  3 +++
 3 files changed, 22 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0c0ea10e61ea..76850fc9cf23 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2672,6 +2672,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth)
 {
 	trace_block_unplug_io(q, depth);
 	__blk_run_queue(q, false);
+
+	if (q->unplugged_fn)
+		q->unplugged_fn(q);
 }
 
 static void flush_plug_list(struct blk_plug *plug)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..eb949045bb12 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,22 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+/**
+ * blk_queue_unplugged - register a callback for an unplug event
+ * @q:		the request queue for the device
+ * @fn:		the function to call
+ *
+ * Some stacked drivers may need to know when IO is dispatched on an
+ * unplug event. By registrering a callback here, they will be notified
+ * when someone flushes their on-stack queue plug. The function will be
+ * called with the queue lock held.
+ */
+void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
+{
+	q->unplugged_fn = fn;
+}
+EXPORT_SYMBOL(blk_queue_unplugged);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32176cc8e715..c07ffafac5d4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unplugged_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -283,6 +284,7 @@ struct request_queue
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	unplugged_fn		*unplugged_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
+extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit 


From 88b996cd0652280cc9b9fc70008fda15f14175e1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 15 Apr 2011 15:20:10 +0200
Subject: block: cleanup the block plug helper functions

It's a bit of a mess currently. task->plug is being cleared
and reset in __blk_finish_plug(), and blk_finish_plug() is
testing for a NULL plug which cannot happen even from schedule()
anymore since it uses blk_needs_flush_plug() to determine
whether to call into this function at all.

So get rid of some of the cruft.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 24 ++++++------------------
 include/linux/blkdev.h |  6 +++---
 2 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 36b1a7559f94..b598fa7720d4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2671,7 +2671,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth)
 		q->unplugged_fn(q);
 }
 
-static void flush_plug_list(struct blk_plug *plug)
+void blk_flush_plug_list(struct blk_plug *plug)
 {
 	struct request_queue *q;
 	unsigned long flags;
@@ -2733,28 +2733,16 @@ static void flush_plug_list(struct blk_plug *plug)
 
 	local_irq_restore(flags);
 }
-
-static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	flush_plug_list(plug);
-
-	if (plug == tsk->plug)
-		tsk->plug = NULL;
-}
+EXPORT_SYMBOL(blk_flush_plug_list);
 
 void blk_finish_plug(struct blk_plug *plug)
 {
-	if (plug)
-		__blk_finish_plug(current, plug);
-}
-EXPORT_SYMBOL(blk_finish_plug);
+	blk_flush_plug_list(plug);
 
-void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	__blk_finish_plug(tsk, plug);
-	tsk->plug = plug;
+	if (plug == current->plug)
+		current->plug = NULL;
 }
-EXPORT_SYMBOL(__blk_flush_plug);
+EXPORT_SYMBOL(blk_finish_plug);
 
 int __init blk_dev_init(void)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c07ffafac5d4..ffe48ff318f9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,14 +865,14 @@ struct blk_plug {
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
-extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *);
 
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	if (unlikely(plug))
-		__blk_flush_plug(tsk, plug);
+	if (plug)
+		blk_flush_plug_list(plug);
 }
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-- 
cgit 


From f6603783f9f099bf7a83b3f6c689bbbf74f0e96e Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Fri, 15 Apr 2011 15:49:07 +0200
Subject: block: only force kblockd unplugging from the schedule() path

For the explicit unplugging, we'd prefer to kick things off
immediately and not pay the penalty of the latency to switch
to kblockd. So let blk_finish_plug() do the run inline, while
the implicit-on-schedule-out unplug will punt to kblockd.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 13 +++++++------
 include/linux/blkdev.h |  4 ++--
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index b598fa7720d4..3c8121072507 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2662,16 +2662,17 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return !(rqa->q <= rqb->q);
 }
 
-static void queue_unplugged(struct request_queue *q, unsigned int depth)
+static void queue_unplugged(struct request_queue *q, unsigned int depth,
+			    bool force_kblockd)
 {
 	trace_block_unplug_io(q, depth);
-	__blk_run_queue(q, true);
+	__blk_run_queue(q, force_kblockd);
 
 	if (q->unplugged_fn)
 		q->unplugged_fn(q);
 }
 
-void blk_flush_plug_list(struct blk_plug *plug)
+void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd)
 {
 	struct request_queue *q;
 	unsigned long flags;
@@ -2706,7 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug)
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			if (q) {
-				queue_unplugged(q, depth);
+				queue_unplugged(q, depth, force_kblockd);
 				spin_unlock(q->queue_lock);
 			}
 			q = rq->q;
@@ -2727,7 +2728,7 @@ void blk_flush_plug_list(struct blk_plug *plug)
 	}
 
 	if (q) {
-		queue_unplugged(q, depth);
+		queue_unplugged(q, depth, force_kblockd);
 		spin_unlock(q->queue_lock);
 	}
 
@@ -2737,7 +2738,7 @@ EXPORT_SYMBOL(blk_flush_plug_list);
 
 void blk_finish_plug(struct blk_plug *plug)
 {
-	blk_flush_plug_list(plug);
+	blk_flush_plug_list(plug, false);
 
 	if (plug == current->plug)
 		current->plug = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ffe48ff318f9..1c76506fcf11 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,14 +865,14 @@ struct blk_plug {
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
-extern void blk_flush_plug_list(struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *, bool);
 
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
 	if (plug)
-		blk_flush_plug_list(plug);
+		blk_flush_plug_list(plug, true);
 }
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-- 
cgit 


From a237c1c5bc5dc5c76a21be922dca4826f3eca8ca Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Sat, 16 Apr 2011 13:27:55 +0200
Subject: block: let io_schedule() flush the plug inline

Linus correctly observes that the most important dispatch cases
are now done from kblockd, this isn't ideal for latency reasons.
The original reason for switching dispatches out-of-line was to
avoid too deep a stack, so by _only_ letting the "accidental"
flush directly in schedule() be guarded by offload to kblockd,
we should be able to get the best of both worlds.

So add a blk_schedule_flush_plug() that offloads to kblockd,
and only use that from the schedule() path.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/blkdev.h | 13 +++++++++++++
 kernel/sched.c         |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1c76506fcf11..ec0357d8c4a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -871,6 +871,14 @@ static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
+	if (plug)
+		blk_flush_plug_list(plug, false);
+}
+
+static inline void blk_schedule_flush_plug(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
 	if (plug)
 		blk_flush_plug_list(plug, true);
 }
@@ -1317,6 +1325,11 @@ static inline void blk_flush_plug(struct task_struct *task)
 {
 }
 
+static inline void blk_schedule_flush_plug(struct task_struct *task)
+{
+}
+
+
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	return false;
diff --git a/kernel/sched.c b/kernel/sched.c
index a187c3fe027b..312f8b95c2d4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4118,7 +4118,7 @@ need_resched:
 			 */
 			if (blk_needs_flush_plug(prev)) {
 				raw_spin_unlock(&rq->lock);
-				blk_flush_plug(prev);
+				blk_schedule_flush_plug(prev);
 				raw_spin_lock(&rq->lock);
 			}
 		}
-- 
cgit 


From 048c9374a749a27f16493cea033fa4a8ff492356 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 18 Apr 2011 09:52:22 +0200
Subject: block: Enhance new plugging support to support general callbacks

md/raid requires an unplug callback, but as it does not uses
requests the current code cannot provide one.

So allow arbitrary callbacks to be attached to the blk_plug.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 20 ++++++++++++++++++++
 include/linux/blkdev.h |  7 ++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 78b7b0cb7216..77edf0512338 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2638,6 +2638,7 @@ void blk_start_plug(struct blk_plug *plug)
 
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
+	INIT_LIST_HEAD(&plug->cb_list);
 	plug->should_sort = 0;
 
 	/*
@@ -2678,6 +2679,24 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 		q->unplugged_fn(q);
 }
 
+static void flush_plug_callbacks(struct blk_plug *plug)
+{
+	LIST_HEAD(callbacks);
+
+	if (list_empty(&plug->cb_list))
+		return;
+
+	list_splice_init(&plug->cb_list, &callbacks);
+
+	while (!list_empty(&callbacks)) {
+		struct blk_plug_cb *cb = list_first_entry(&callbacks,
+							  struct blk_plug_cb,
+							  list);
+		list_del(&cb->list);
+		cb->callback(cb);
+	}
+}
+
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
@@ -2688,6 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
+	flush_plug_callbacks(plug);
 	if (list_empty(&plug->list))
 		return;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ec0357d8c4a5..f3f7879391a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -860,8 +860,13 @@ extern void blk_put_queue(struct request_queue *);
 struct blk_plug {
 	unsigned long magic;
 	struct list_head list;
+	struct list_head cb_list;
 	unsigned int should_sort;
 };
+struct blk_plug_cb {
+	struct list_head list;
+	void (*callback)(struct blk_plug_cb *);
+};
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
@@ -887,7 +892,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	return plug && !list_empty(&plug->list);
+	return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
 }
 
 /*
-- 
cgit 


From b4cb290e0a7d19235bd075c2ad4d60dbab0bac15 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Mon, 18 Apr 2011 09:54:05 +0200
Subject: Revert "block: add callback function for unplug notification"

MD can't use this since it really requires us to be able to
keep more than a single piece of state for the unplug. Commit
048c9374 added the required support for MD, so get rid of this
now unused code.

This reverts commit f75664570d8b75469cc468f23c2b27220984983b.

Conflicts:

	block/blk-core.c

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       |  3 ---
 block/blk-settings.c   | 16 ----------------
 include/linux/blkdev.h |  3 ---
 3 files changed, 22 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 77edf0512338..09b262811fff 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2674,9 +2674,6 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 {
 	trace_block_unplug(q, depth, !from_schedule);
 	__blk_run_queue(q, from_schedule);
-
-	if (q->unplugged_fn)
-		q->unplugged_fn(q);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eb949045bb12..1fa769293597 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,22 +790,6 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
-/**
- * blk_queue_unplugged - register a callback for an unplug event
- * @q:		the request queue for the device
- * @fn:		the function to call
- *
- * Some stacked drivers may need to know when IO is dispatched on an
- * unplug event. By registrering a callback here, they will be notified
- * when someone flushes their on-stack queue plug. The function will be
- * called with the queue lock held.
- */
-void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
-{
-	q->unplugged_fn = fn;
-}
-EXPORT_SYMBOL(blk_queue_unplugged);
-
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3f7879391a7..3448d89297e8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unplugged_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -284,7 +283,6 @@ struct request_queue
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
-	unplugged_fn		*unplugged_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
-extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit 


From 24ecfbe27f65563909b14492afda2f1c21f7c044 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 18 Apr 2011 11:41:33 +0200
Subject: block: add blk_run_queue_async

Instead of overloading __blk_run_queue to force an offload to kblockd
add a new blk_run_queue_async helper to do it explicitly.  I've kept
the blk_queue_stopped check for now, but I suspect it's not needed
as the check we do when the workqueue items runs should be enough.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c                 | 36 ++++++++++++++++++++++++------------
 block/blk-exec.c                 |  2 +-
 block/blk-flush.c                |  4 ++--
 block/blk.h                      |  1 +
 block/cfq-iosched.c              |  6 +++---
 block/elevator.c                 |  4 ++--
 drivers/scsi/scsi_lib.c          |  2 +-
 drivers/scsi/scsi_transport_fc.c |  2 +-
 include/linux/blkdev.h           |  2 +-
 9 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index e2bacfa46cc3..5fa3dd2705c6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -204,7 +204,7 @@ static void blk_delay_work(struct work_struct *work)
 
 	q = container_of(work, struct request_queue, delay_work.work);
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irq(q->queue_lock);
 }
 
@@ -239,7 +239,7 @@ void blk_start_queue(struct request_queue *q)
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -296,11 +296,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled. If force_kblockd is true, then it is
- *    safe to call this without holding the queue lock.
- *
+ *    held and interrupts disabled.
  */
-void __blk_run_queue(struct request_queue *q, bool force_kblockd)
+void __blk_run_queue(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
@@ -309,7 +307,7 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
 		q->request_fn(q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else
@@ -317,6 +315,20 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
+/**
+ * blk_run_queue_async - run a single device queue in workqueue context
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
+ *    of us.
+ */
+void blk_run_queue_async(struct request_queue *q)
+{
+	if (likely(!blk_queue_stopped(q)))
+		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+}
+
 /**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
@@ -330,7 +342,7 @@ void blk_run_queue(struct request_queue *q)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -979,7 +991,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 		blk_queue_end_tag(q, rq);
 
 	add_acct_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
@@ -1323,7 +1335,7 @@ get_rq:
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
@@ -2684,9 +2696,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 	 */
 	if (from_schedule) {
 		spin_unlock(q->queue_lock);
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 	} else {
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		spin_unlock(q->queue_lock);
 	}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 7482b7fa863b..81e31819a597 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
 	__elv_add_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	/* the queue is stopped so it won't be plugged+unplugged */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index eba4a2790c6c..6c9b5e189e62 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * request_fn may confuse the driver.  Always use kblockd.
 	 */
 	if (queued)
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
@@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error)
 	 * the comment in flush_end_io().
 	 */
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index 61263463e38e..c9df8fc3c999 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,6 +22,7 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
+void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3be881ec95ad..46b0a1d1d925 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3368,7 +3368,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);
-				__blk_run_queue(cfqd->queue, false);
+				__blk_run_queue(cfqd->queue);
 			} else {
 				cfq_blkiocg_update_idle_time_stats(
 						&cfqq->cfqg->blkg);
@@ -3383,7 +3383,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		__blk_run_queue(cfqd->queue, false);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -3743,7 +3743,7 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(cfqd->queue, false);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index 0cdb4e7ebab4..6f6abc08bb56 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q)
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -695,7 +695,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT_MERGE:
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6d5c7ff43f5b..ab55c2fa7ce2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
 					&sdev->request_queue->queue_flags);
 		if (flagset)
 			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue, false);
+		__blk_run_queue(sdev->request_queue);
 		if (flagset)
 			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
 		spin_unlock(sdev->request_queue->queue_lock);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index fdf3fa639056..28c33506e4ad 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
 		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
 	if (flagset)
 		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q, false);
+	__blk_run_queue(rport->rqst_q);
 	if (flagset)
 		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
 	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3448d89297e8..cbbfd98ad4a3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -697,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
+extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
-- 
cgit 


From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 19 Apr 2011 13:32:46 +0200
Subject: block: get rid of QUEUE_FLAG_REENTER

We are currently using this flag to check whether it's safe
to call into ->request_fn(). If it is set, we punt to kblockd.
But we get a lot of false positives and excessive punts to
kblockd, which hurts performance.

The only real abuser of this infrastructure is SCSI. So export
the async queue run and convert SCSI over to use that. There's
room for improvement in that SCSI need not always use the async
call, but this fixes our performance issue and they can fix that
up in due time.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c                 | 11 ++---------
 block/blk.h                      |  1 -
 drivers/scsi/scsi_lib.c          | 17 +----------------
 drivers/scsi/scsi_transport_fc.c | 19 ++++---------------
 include/linux/blkdev.h           | 26 +++++++++++++-------------
 5 files changed, 20 insertions(+), 54 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 580eee5743e5..40725b9091f1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	/*
-	 * Only recurse once to avoid overrunning the stack, let the unplug
-	 * handling reinvoke the handler shortly if we already got there.
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else
-		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q)
 	if (likely(!blk_queue_stopped(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
+EXPORT_SYMBOL(blk_run_queue_async);
 
 /**
  * blk_run_queue - run a single device queue
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..61263463e38e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
-void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ab55c2fa7ce2..e9901b8f8443 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q)
 	list_splice_init(&shost->starved_list, &starved_list);
 
 	while (!list_empty(&starved_list)) {
-		int flagset;
-
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q)
 			continue;
 		}
 
-		spin_unlock(shost->host_lock);
-
-		spin_lock(sdev->request_queue->queue_lock);
-		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-				!test_bit(QUEUE_FLAG_REENTER,
-					&sdev->request_queue->queue_flags);
-		if (flagset)
-			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue);
-		if (flagset)
-			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
-		spin_unlock(sdev->request_queue->queue_lock);
-
-		spin_lock(shost->host_lock);
+		blk_run_queue_async(sdev->request_queue);
 	}
 	/* put any unprocessed entries back */
 	list_splice(&starved_list, &shost->starved_list);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 28c33506e4ad..815069d13f9b 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3816,28 +3816,17 @@ fail_host_msg:
 static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
-	int flagset;
-	unsigned long flags;
-
 	if (!rport->rqst_q)
 		return;
 
+	/*
+	 * This get/put dance makes no sense
+	 */
 	get_device(&rport->dev);
-
-	spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
-	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
-		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
-	if (flagset)
-		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q);
-	if (flagset)
-		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
-
+	blk_run_queue_async(rport->rqst_q);
 	put_device(&rport->dev);
 }
 
-
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
  * @q:		rport request queue
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..2ad95fa1d130 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -388,20 +388,19 @@ struct request_queue
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
-#define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
-#define QUEUE_FLAG_ELVSWITCH	7	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI		8	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES     9	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   10	/* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     11	/* fake timeout */
-#define QUEUE_FLAG_STACKABLE   12	/* supports request stacking */
-#define QUEUE_FLAG_NONROT      13	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP	9	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     10	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   11	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      12	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
+#define QUEUE_FLAG_IO_STAT     13	/* do IO stats */
+#define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
+#define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
+extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit 


From f3876930952390a31c3a7fd68dd621464a36eb80 Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:32 -0600
Subject: block: add a non-queueable flush flag

flush request isn't queueable in some drives. Add a flag to let driver
notify block layer about this. We can optimize flush performance with the
knowledge.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 6 ++++++
 include/linux/blkdev.h | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..cd3c428e194f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+	q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..8bd2a271b2d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -364,6 +364,7 @@ struct request_queue
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
+	unsigned int		flush_not_queueable:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
@@ -843,6 +844,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1111,6 +1113,11 @@ static inline unsigned int block_size(struct block_device *bdev)
 	return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+	return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-- 
cgit 


From 3ac0cc4508709d42ec9aa351086c7d38bfc0660c Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:41 -0600
Subject: block: hold queue if flush is running for non-queueable flush drive

In some drives, flush requests are non-queueable. When flush request is
running, normal read/write requests can't run. If block layer dispatches
such request, driver can't handle it and requeue it.  Tejun suggested we
can hold the queue when flush is running. This can avoid unnecessary
requeue.  Also this can improve performance. For example, we have
request flush1, write1, flush 2. flush1 is dispatched, then queue is
hold, write1 isn't inserted to queue. After flush1 is finished, flush2
will be dispatched. Since disk cache is already clean, flush2 will be
finished very soon, so looks like flush2 is folded to flush1.

In my test, the queue holding completely solves a regression introduced by
commit 53d63e6b0dfb95882ec0219ba6bbd50cde423794:

    block: make the flush insertion use the tail of the dispatch list

    It's not a preempt type request, in fact we have to insert it
    behind requests that do specify INSERT_FRONT.

which causes about 20% regression running a sysbench fileio
workload.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c      | 16 +++++++++++-----
 block/blk.h            | 21 ++++++++++++++++++++-
 include/linux/blkdev.h |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e189e62..bb21e4c36f70 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
 	}
 
 	/*
-	 * Moving a request silently to empty queue_head may stall the
-	 * queue.  Kick the queue in those cases.  This function is called
-	 * from request completion path and calling directly into
-	 * request_fn may confuse the driver.  Always use kblockd.
+	 * Kick the queue to avoid stall for two cases:
+	 * 1. Moving a request silently to empty queue_head may stall the
+	 * queue.
+	 * 2. When flush request is running in non-queueable queue, the
+	 * queue is hold. Restart the queue after flush request is finished
+	 * to avoid stall.
+	 * This function is called from request completion path and calling
+	 * directly into request_fn may confuse the driver.  Always use
+	 * kblockd.
 	 */
-	if (queued)
+	if (queued || q->flush_queue_delayed)
 		blk_run_queue_async(q);
+	q->flush_queue_delayed = 0;
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..83e4bff36201 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			return rq;
 		}
-
+		/*
+		 * Flush request is running and flush request isn't queueable
+		 * in the drive, we can hold the queue till flush request is
+		 * finished. Even we don't do this, driver can't dispatch next
+		 * requests and will requeue them. And this can improve
+		 * throughput too. For example, we have request flush1, write1,
+		 * flush 2. flush1 is dispatched, then queue is hold, write1
+		 * isn't inserted to queue. After flush1 is finished, flush2
+		 * will be dispatched. Since disk cache is already clean,
+		 * flush2 will be finished very soon, so looks like flush2 is
+		 * folded to flush1.
+		 * Since the queue is hold, a flag is set to indicate the queue
+		 * should be restarted later. Please see flush_end_io() for
+		 * details.
+		 */
+		if (q->flush_pending_idx != q->flush_running_idx &&
+				!queue_flush_queueable(q)) {
+			q->flush_queue_delayed = 1;
+			return NULL;
+		}
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bd2a271b2d8..9f921bf4bf8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -365,6 +365,7 @@ struct request_queue
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
+	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
-- 
cgit 


From a934a00a69e940b126b9bdbf83e630ef5fe43523 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 18 May 2011 10:37:35 +0200
Subject: block: Fix discard topology stacking and reporting

In some cases we would end up stacking discard_zeroes_data incorrectly.
Fix this by enabling the feature by default for stacking drivers and
clearing it for low-level drivers. Incorporating a device that does not
support dzd will then cause the feature to be disabled in the stacking
driver.

Also ensure that the maximum discard value does not overflow when
exported in sysfs and return 0 in the alignment and dzd fields for
devices that don't support discard.

Reported-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 3 ++-
 block/blk-sysfs.c      | 3 ++-
 include/linux/blkdev.h | 7 +++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index cd3c428e194f..fa1eb0449a05 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = -1;
+	lim->discard_zeroes_data = 1;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 
 	blk_set_default_limits(&q->limits);
 	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6d735122bc59..53bd0c77bfda 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+	return sprintf(page, "%llu\n",
+		       (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9f921bf4bf8c..520d8618ed76 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -257,7 +257,7 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
-	signed char		discard_zeroes_data;
+	unsigned char		discard_zeroes_data;
 };
 
 struct request_queue
@@ -1069,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
+	if (!lim->max_discard_sectors)
+		return 0;
+
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
-	if (q->limits.discard_zeroes_data == 1)
+	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
 		return 1;
 
 	return 0;
-- 
cgit