From 68154e90c9d1492d570671ae181d9a8f8530da55 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Apr 2008 12:47:50 +0200
Subject: block: add dma alignment and padding support to blk_rq_map_kern

This patch adds bio_copy_kern similar to
bio_copy_user. blk_rq_map_kern uses bio_copy_kern instead of
bio_map_kern if necessary.

bio_copy_kern uses temporary pages and the bi_end_io callback frees
these pages. bio_copy_kern saves the original kernel buffer at
bio->bi_private it doesn't use something like struct bio_map_data to
store the information about the caller.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d259690863fb..61c15eaf3fb3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,6 +324,8 @@ extern struct bio *bio_map_user_iov(struct request_queue *,
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
 				gfp_t);
+extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
+				 gfp_t, int);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
-- 
cgit 


From 75ad23bc0fcb4f992a5d06982bf0857ab1738e9e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 29 Apr 2008 14:48:33 +0200
Subject: block: make queue flags non-atomic

We can save some atomic ops in the IO path, if we clearly define
the rules of how to modify the queue flags.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c                  | 39 ++++++++++++++++++++++++++-------------
 block/blk-merge.c                 |  6 +++---
 block/blk-settings.c              |  2 +-
 block/blk-tag.c                   |  8 ++++----
 block/elevator.c                  | 13 ++++++++++---
 drivers/block/loop.c              |  2 +-
 drivers/block/ub.c                |  2 +-
 drivers/md/dm-table.c             |  7 +++++--
 drivers/md/md.c                   |  3 ++-
 drivers/scsi/scsi_debug.c         |  2 +-
 drivers/scsi/scsi_lib.c           | 31 ++++++++++++++++++-------------
 drivers/scsi/scsi_transport_sas.c |  3 +--
 include/linux/blkdev.h            | 33 +++++++++++++++++++++++++++++----
 13 files changed, 102 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index e447799256d6..d2f23ec5ebfa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -198,7 +198,8 @@ void blk_plug_device(struct request_queue *q)
 	if (blk_queue_stopped(q))
 		return;
 
-	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+	if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+		__set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
 	}
@@ -213,9 +214,10 @@ int blk_remove_plug(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 
-	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+	if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
 		return 0;
 
+	queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
 	del_timer(&q->unplug_timer);
 	return 1;
 }
@@ -311,15 +313,16 @@ void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 
-	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 
 	/*
 	 * one level of recursion is ok and is much faster than kicking
 	 * the unplug handling
 	 */
-	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+	if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+		queue_flag_set(QUEUE_FLAG_REENTER, q);
 		q->request_fn(q);
-		clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else {
 		blk_plug_device(q);
 		kblockd_schedule_work(&q->unplug_work);
@@ -344,7 +347,7 @@ EXPORT_SYMBOL(blk_start_queue);
 void blk_stop_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
-	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 
@@ -373,11 +376,8 @@ EXPORT_SYMBOL(blk_sync_queue);
  * blk_run_queue - run a single device queue
  * @q:	The queue to run
  */
-void blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
 	blk_remove_plug(q);
 
 	/*
@@ -385,15 +385,28 @@ void blk_run_queue(struct request_queue *q)
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
 	if (!elv_queue_empty(q)) {
-		if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+		if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+			queue_flag_set(QUEUE_FLAG_REENTER, q);
 			q->request_fn(q);
-			clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+			queue_flag_clear(QUEUE_FLAG_REENTER, q);
 		} else {
 			blk_plug_device(q);
 			kblockd_schedule_work(&q->unplug_work);
 		}
 	}
+}
+EXPORT_SYMBOL(__blk_run_queue);
 
+/**
+ * blk_run_queue - run a single device queue
+ * @q: The queue to run
+ */
+void blk_run_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -406,7 +419,7 @@ void blk_put_queue(struct request_queue *q)
 void blk_cleanup_queue(struct request_queue *q)
 {
 	mutex_lock(&q->sysfs_lock);
-	set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 
 	if (q->elevator)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b5c5c4a9e3f0..73b23562af20 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -55,7 +55,7 @@ void blk_recalc_rq_segments(struct request *rq)
 	if (!rq->bio)
 		return;
 
-	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 	hw_seg_size = seg_size = 0;
 	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
 	rq_for_each_segment(bv, rq, iter) {
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(blk_recount_segments);
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
 {
-	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
+	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 
 	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
@@ -175,7 +175,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	int nsegs, cluster;
 
 	nsegs = 0;
-	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 
 	/*
 	 * for each bio in rq
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 77b51dc37a3c..6089384ab064 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -287,7 +287,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 	t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
 	t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
-		clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 4780a46ce234..e176ddbe599e 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q)
 	__blk_free_tags(bqt);
 
 	q->queue_tags = NULL;
-	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
+	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 
 /**
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags);
  **/
 void blk_queue_free_tags(struct request_queue *q)
 {
-	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 EXPORT_SYMBOL(blk_queue_free_tags);
 
@@ -188,7 +188,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
 		rc = blk_queue_resize_tags(q, depth);
 		if (rc)
 			return rc;
-		set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_QUEUED, q);
 		return 0;
 	} else
 		atomic_inc(&tags->refcnt);
@@ -197,7 +197,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
 	 * assign it, all done
 	 */
 	q->queue_tags = tags;
-	q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+	queue_flag_set(QUEUE_FLAG_QUEUED, q);
 	INIT_LIST_HEAD(&q->tag_busy_list);
 	return 0;
 fail:
diff --git a/block/elevator.c b/block/elevator.c
index 88318c383608..e8a90fe23424 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1070,7 +1070,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 */
 	spin_lock_irq(q->queue_lock);
 
-	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
 	elv_drain_elevator(q);
 
@@ -1104,7 +1104,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+	spin_unlock_irq(q->queue_lock);
+
 	return 1;
 
 fail_register:
@@ -1115,7 +1118,11 @@ fail_register:
 	elevator_exit(e);
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+	spin_unlock_irq(q->queue_lock);
+
 	return 0;
 }
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f7f163557aa0..d3a25b027ff9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -546,7 +546,7 @@ static void loop_unplug(struct request_queue *q)
 {
 	struct loop_device *lo = q->queuedata;
 
-	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+	queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
 	blk_run_address_space(lo->lo_backing_file->f_mapping);
 }
 
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 27bfe72aab59..e322cce8c12d 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2399,7 +2399,7 @@ static void ub_disconnect(struct usb_interface *intf)
 		del_gendisk(lun->disk);
 		/*
 		 * I wish I could do:
-		 *    set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+		 *    queue_flag_set(QUEUE_FLAG_DEAD, q);
 		 * As it is, we rely on our internal poisoning and let
 		 * the upper levels to spin furiously failing all the I/O.
 		 */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 51be53344214..73326e7c54bf 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -873,10 +873,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	q->max_hw_sectors = t->limits.max_hw_sectors;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
 	q->bounce_pfn = t->limits.bounce_pfn;
+	/* XXX: the below will probably go bug. must ensure there can be no
+	 * concurrency on queue_flags, and use the unlocked versions...
+	 */
 	if (t->limits.no_cluster)
-		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_clear(QUEUE_FLAG_CLUSTER, q);
 	else
-		q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_set(QUEUE_FLAG_CLUSTER, q);
 
 }
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 87620b705bee..acd716b657b8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -282,7 +282,8 @@ static mddev_t * mddev_find(dev_t unit)
 		kfree(new);
 		return NULL;
 	}
-	set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+	/* Can be unlocked because the queue is new: no concurrency */
+	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
 
 	blk_queue_make_request(new->queue, md_fail_request);
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 07103c399fe0..f6600bfb5bde 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1773,7 +1773,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
 		printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n",
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-	set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 67f412bb4974..d545ad1cf47a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -536,6 +536,9 @@ static void scsi_run_queue(struct request_queue *q)
 	       !shost->host_blocked && !shost->host_self_blocked &&
 		!((shost->can_queue > 0) &&
 		  (shost->host_busy >= shost->can_queue))) {
+
+		int flagset;
+
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -549,19 +552,20 @@ static void scsi_run_queue(struct request_queue *q)
 		sdev = list_entry(shost->starved_list.next,
 					  struct scsi_device, starved_entry);
 		list_del_init(&sdev->starved_entry);
-		spin_unlock_irqrestore(shost->host_lock, flags);
-
+		spin_unlock(shost->host_lock);
+
+		spin_lock(sdev->request_queue->queue_lock);
+		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+				!test_bit(QUEUE_FLAG_REENTER,
+					&sdev->request_queue->queue_flags);
+		if (flagset)
+			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
+		__blk_run_queue(sdev->request_queue);
+		if (flagset)
+			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
+		spin_unlock(sdev->request_queue->queue_lock);
 
-		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-		    !test_and_set_bit(QUEUE_FLAG_REENTER,
-				      &sdev->request_queue->queue_flags)) {
-			blk_run_queue(sdev->request_queue);
-			clear_bit(QUEUE_FLAG_REENTER,
-				  &sdev->request_queue->queue_flags);
-		} else
-			blk_run_queue(sdev->request_queue);
-
-		spin_lock_irqsave(shost->host_lock, flags);
+		spin_lock(shost->host_lock);
 		if (unlikely(!list_empty(&sdev->starved_entry)))
 			/*
 			 * sdev lost a race, and was put back on the
@@ -1585,8 +1589,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
+	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 7899e3dda9bf..f4461d35ffb9 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -248,8 +248,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	else
 		q->queuedata = shost;
 
-	set_bit(QUEUE_FLAG_BIDI, &q->queue_flags);
-
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
 	return 0;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c5065e3d2ca9..8ca481cd7d73 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -408,6 +408,30 @@ struct request_queue
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 
+static inline void queue_flag_set_unlocked(unsigned int flag,
+					   struct request_queue *q)
+{
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear_unlocked(unsigned int flag,
+					     struct request_queue *q)
+{
+	__clear_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+	__clear_bit(flag, &q->queue_flags);
+}
+
 enum {
 	/*
 	 * Hardbarrier is supported with one of the following methods.
@@ -496,17 +520,17 @@ static inline int blk_queue_full(struct request_queue *q, int rw)
 static inline void blk_set_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-		set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_READFULL, q);
 	else
-		set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
 }
 
 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-		clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_READFULL, q);
 	else
-		clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
 }
 
 
@@ -626,6 +650,7 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
+extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);
-- 
cgit 


From 2a4aa30c5f967eb6ae874c67fa6fceeee84815f9 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Apr 2008 09:54:36 +0200
Subject: block: rename and export rq_init()

This rename rq_init() blk_rq_init() and export it. Any path that hands
the request to the block layer needs to call it to initialize the
request.

This is a preparation for large command support, which needs to
initialize the request in a proper way (that is, just doing a memset()
will not work).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 4 ++--
 block/blk-core.c       | 5 +++--
 block/blk.h            | 1 -
 include/linux/blkdev.h | 1 +
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 47127ba09e45..66e55288178c 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -143,7 +143,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
 		end_io = post_flush_end_io;
 	}
 
-	rq_init(q, rq);
+	blk_rq_init(q, rq);
 	rq->cmd_flags = REQ_HARDBARRIER;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->end_io = end_io;
@@ -165,7 +165,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 	blkdev_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = &q->bar_rq;
-	rq_init(q, rq);
+	blk_rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
 	if (q->ordered & QUEUE_ORDERED_FUA)
diff --git a/block/blk-core.c b/block/blk-core.c
index d2f23ec5ebfa..fe0d1390b743 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -107,7 +107,7 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
-void rq_init(struct request_queue *q, struct request *rq)
+void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
 
@@ -120,6 +120,7 @@ void rq_init(struct request_queue *q, struct request *rq)
 	rq->tag = -1;
 	rq->ref_count = 1;
 }
+EXPORT_SYMBOL(blk_rq_init);
 
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
@@ -598,7 +599,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
 	if (!rq)
 		return NULL;
 
-	rq_init(q, rq);
+	blk_rq_init(q, rq);
 
 	/*
 	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
diff --git a/block/blk.h b/block/blk.h
index ec9120fb789a..59776ab4742a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -10,7 +10,6 @@
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
 
-void rq_init(struct request_queue *q, struct request *rq);
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8ca481cd7d73..d17032c347c0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -607,6 +607,7 @@ extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
+extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern void blk_end_sync_rq(struct request *rq, int error);
-- 
cgit 


From d7e3c3249ef23b4617393c69fe464765b4ff1645 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Apr 2008 09:54:39 +0200
Subject: block: add large command support

This patch changes rq->cmd from the static array to a pointer to
support large commands.

We rarely handle large commands. So for optimization, a struct request
still has a static array for a command. rq_init sets rq->cmd pointer
to the static array.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 1 +
 include/linux/blkdev.h | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index e6fdb288be65..5d09f8c56024 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->sector = rq->hard_sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
+	rq->cmd = rq->__cmd;
 	rq->tag = -1;
 	rq->ref_count = 1;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d17032c347c0..08df1ea8bac4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -215,8 +215,9 @@ struct request {
 	/*
 	 * when request is used as a packet command carrier
 	 */
-	unsigned int cmd_len;
-	unsigned char cmd[BLK_MAX_CDB];
+	unsigned short cmd_len;
+	unsigned char __cmd[BLK_MAX_CDB];
+	unsigned char *cmd;
 
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
-- 
cgit 


From ac9fafa1243640349aa481adf473db283a695766 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Tue, 29 Apr 2008 14:44:19 +0200
Subject: block: Skip I/O merges when disabled

The block I/O + elevator + I/O scheduler code spend a lot of time trying
to merge I/Os -- rightfully so under "normal" circumstances. However,
if one were to know that the incoming I/O stream was /very/ random in
nature, the cycles are wasted.

This patch adds a per-request_queue tunable that (when set) disables
merge attempts (beyond the simple one-hit cache check), thus freeing up
a non-trivial amount of CPU cycles.

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-sysfs.c      | 26 ++++++++++++++++++++++++++
 block/elevator.c       |  3 +++
 include/linux/blkdev.h |  2 ++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fc41d83be22b..e85c4013e8a2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -135,6 +135,25 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_nomerges(q), page);
+}
+
+static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
+				    size_t count)
+{
+	unsigned long nm;
+	ssize_t ret = queue_var_store(&nm, page, count);
+
+	if (nm)
+	       set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+	else
+	       clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+
+	return ret;
+}
+
 
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -170,6 +189,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.show = queue_hw_sector_size_show,
 };
 
+static struct queue_sysfs_entry queue_nomerges_entry = {
+	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_nomerges_show,
+	.store = queue_nomerges_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -177,6 +202,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
+	&queue_nomerges_entry.attr,
 	NULL,
 };
 
diff --git a/block/elevator.c b/block/elevator.c
index 7253fa05db0a..ac5310ef8270 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -488,6 +488,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		}
 	}
 
+	if (blk_queue_nomerges(q))
+		return ELEVATOR_NO_MERGE;
+
 	/*
 	 * See if our hash lookup can find a potential backmerge.
 	 */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 08df1ea8bac4..c09696a90d6a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -408,6 +408,7 @@ struct request_queue
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
 
 static inline void queue_flag_set_unlocked(unsigned int flag,
 					   struct request_queue *q)
@@ -476,6 +477,7 @@ enum {
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
-- 
cgit