From b2bf96833c5782befc3e7700f791fde754a47b01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Feb 2009 08:50:26 +0100 Subject: block: fix bogus gcc warning for uninitialized var usage Newer gcc throw this warning: fs/bio.c: In function ?bio_alloc_bioset?: fs/bio.c:305: warning: ?p? may be used uninitialized in this function since it cannot figure out that 'p' is only ever used if 'bs' is non-NULL. Signed-off-by: Jens Axboe --- fs/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bio.c b/fs/bio.c index 72ab251cdb9c..124b95c4d582 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -302,7 +302,7 @@ void bio_init(struct bio *bio) struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = NULL; - void *p; + void *uninitialized_var(p); if (bs) { p = mempool_alloc(bs->bio_pool, gfp_mask); -- cgit From 9e8c0bccdc944bd09361672d47660810c027bcaa Mon Sep 17 00:00:00 2001 From: Márton Németh Date: Fri, 20 Feb 2009 08:12:51 +0100 Subject: block: add documentation for register_blkdev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for register_blkdev() function and for the parameters. Signed-off-by: Márton Németh Cc: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- block/genhd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index e1eadcc9546a..a9ec910974c1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -256,6 +256,22 @@ void blkdev_show(struct seq_file *seqf, off_t offset) } #endif /* CONFIG_PROC_FS */ +/** + * register_blkdev - register a new block device + * + * @major: the requested major device number [1..255]. If @major=0, try to + * allocate any unused major number. + * @name: the name of the new block device as a zero terminated string + * + * The @name must be unique within the system. + * + * The return value depends on the @major input parameter. + * - if a major device number was requested in range [1..255] then the + * function returns zero on success, or a negative error code + * - if any unused major number was requested with @major=0 parameter + * then the return value is the allocated major number in range + * [1..255] or a negative error code otherwise + */ int register_blkdev(unsigned int major, const char *name) { struct blk_major_name **n, *p; -- cgit From 5e4c91c84b194b26cf592779e451f4b5be777cba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 08:53:35 +0100 Subject: cciss: shorten 30s timeout on controller reset If reset_devices is set for kexec, then cciss will delay 30 seconds since the old 5i controller _may_ need that long to recover. Replace the long sleep with incremental sleep and tests to reduce the 30 seconds to worst case for 5i, so that other controllers will proceed quickly. Reviewed-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2cb67b61176..b5a061114630 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3611,11 +3611,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, schedule_timeout_uninterruptible(30*HZ); /* Now try to get the controller to respond to a no-op */ - for (i=0; i<12; i++) { + for (i=0; i<30; i++) { if (cciss_noop(pdev) == 0) break; - else - printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); + + schedule_timeout_uninterruptible(HZ); + } + if (i == 30) { + printk(KERN_ERR "cciss: controller seems dead\n"); + return -EBUSY; } } -- cgit From 1e42807918d17e8c93bf14fbb74be84b141334c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 09:03:10 +0100 Subject: block: reduce stack footprint of blk_recount_segments() blk_recalc_rq_segments() requires a request structure passed in, which we don't have from blk_recount_segments(). So the latter allocates one on the stack, using > 400 bytes of stack for that. This can cause us to spill over one page of stack from ext4 at least: 0) 4560 400 blk_recount_segments+0x43/0x62 1) 4160 32 bio_phys_segments+0x1c/0x24 2) 4128 32 blk_rq_bio_prep+0x2a/0xf9 3) 4096 32 init_request_from_bio+0xf9/0xfe 4) 4064 112 __make_request+0x33c/0x3f6 5) 3952 144 generic_make_request+0x2d1/0x321 6) 3808 64 submit_bio+0xb9/0xc3 7) 3744 48 submit_bh+0xea/0x10e 8) 3696 368 ext4_mb_init_cache+0x257/0xa6a [ext4] 9) 3328 288 ext4_mb_regular_allocator+0x421/0xcd9 [ext4] 10) 3040 160 ext4_mb_new_blocks+0x211/0x4b4 [ext4] 11) 2880 336 ext4_ext_get_blocks+0xb61/0xd45 [ext4] 12) 2544 96 ext4_get_blocks_wrap+0xf2/0x200 [ext4] 13) 2448 80 ext4_da_get_block_write+0x6e/0x16b [ext4] 14) 2368 352 mpage_da_map_blocks+0x7e/0x4b3 [ext4] 15) 2016 352 ext4_da_writepages+0x2ce/0x43c [ext4] 16) 1664 32 do_writepages+0x2d/0x3c 17) 1632 144 __writeback_single_inode+0x162/0x2cd 18) 1488 96 generic_sync_sb_inodes+0x1e3/0x32b 19) 1392 16 sync_sb_inodes+0xe/0x10 20) 1376 48 writeback_inodes+0x69/0xb3 21) 1328 208 balance_dirty_pages_ratelimited_nr+0x187/0x2f9 22) 1120 224 generic_file_buffered_write+0x1d4/0x2c4 23) 896 176 __generic_file_aio_write_nolock+0x35f/0x393 24) 720 80 generic_file_aio_write+0x6c/0xc8 25) 640 80 ext4_file_write+0xa9/0x137 [ext4] 26) 560 320 do_sync_write+0xf0/0x137 27) 240 48 vfs_write+0xb3/0x13c 28) 192 64 sys_write+0x4c/0x74 29) 128 128 system_call_fastpath+0x16/0x1b Split the segment counting out into a __blk_recalc_rq_segments() helper to avoid allocating an onstack request just for checking the physical segment count. Signed-off-by: Jens Axboe --- block/blk-merge.c | 94 ++++++++++++++++++++++++++++---------------------- include/linux/blkdev.h | 2 ++ 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index b92f5b0866b0..a104593e70c3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -38,72 +38,84 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) } } -void blk_recalc_rq_segments(struct request *rq) +static unsigned int __blk_recalc_rq_segments(struct request_queue *q, + struct bio *bio, + unsigned int *seg_size_ptr) { - int nr_phys_segs; unsigned int phys_size; struct bio_vec *bv, *bvprv = NULL; - int seg_size; - int cluster; - struct req_iterator iter; - int high, highprv = 1; - struct request_queue *q = rq->q; + int cluster, i, high, highprv = 1; + unsigned int seg_size, nr_phys_segs; + struct bio *fbio; - if (!rq->bio) - return; + if (!bio) + return 0; + fbio = bio; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); seg_size = 0; phys_size = nr_phys_segs = 0; - rq_for_each_segment(bv, rq, iter) { - /* - * the trick here is making sure that a high page is never - * considered part of another segment, since that might - * change with the bounce page. - */ - high = page_to_pfn(bv->bv_page) > q->bounce_pfn; - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len > q->max_segment_size) - goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + for_each_bio(bio) { + bio_for_each_segment(bv, bio, i) { + /* + * the trick here is making sure that a high page is + * never considered part of another segment, since that + * might change with the bounce page. + */ + high = page_to_pfn(bv->bv_page) > q->bounce_pfn; + if (high || highprv) goto new_segment; + if (cluster) { + if (seg_size + bv->bv_len > q->max_segment_size) + goto new_segment; + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + goto new_segment; + + seg_size += bv->bv_len; + bvprv = bv; + continue; + } +new_segment: + if (nr_phys_segs == 1 && seg_size > + fbio->bi_seg_front_size) + fbio->bi_seg_front_size = seg_size; - seg_size += bv->bv_len; + nr_phys_segs++; bvprv = bv; - continue; + seg_size = bv->bv_len; + highprv = high; } -new_segment: - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) - rq->bio->bi_seg_front_size = seg_size; - - nr_phys_segs++; - bvprv = bv; - seg_size = bv->bv_len; - highprv = high; } - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + if (seg_size_ptr) + *seg_size_ptr = seg_size; + + return nr_phys_segs; +} + +void blk_recalc_rq_segments(struct request *rq) +{ + unsigned int seg_size = 0, phys_segs; + + phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size); + + if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) rq->bio->bi_seg_front_size = seg_size; if (seg_size > rq->biotail->bi_seg_back_size) rq->biotail->bi_seg_back_size = seg_size; - rq->nr_phys_segments = nr_phys_segs; + rq->nr_phys_segments = phys_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - struct request rq; struct bio *nxt = bio->bi_next; - rq.q = q; - rq.bio = rq.biotail = bio; + bio->bi_next = NULL; - blk_recalc_rq_segments(&rq); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL); bio->bi_next = nxt; - bio->bi_phys_segments = rq.nr_phys_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dcaa0fd84b02..465d6babc847 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -708,6 +708,8 @@ struct req_iterator { }; /* This should not be used directly - use rq_for_each_segment */ +#define for_each_bio(_bio) \ + for (; _bio; _bio = _bio->bi_next) #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -- cgit From 9e973e64ac6dc504e6447d52193d4fff1a670156 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Feb 2009 08:10:09 +0100 Subject: xen/blkfront: use blk_rq_map_sg to generate ring entries On occasion, the request will apparently have more segments than we fit into the ring. Jens says: > The second problem is that the block layer then appears to create one > too many segments, but from the dump it has rq->nr_phys_segments == > BLKIF_MAX_SEGMENTS_PER_REQUEST. I suspect the latter is due to > xen-blkfront not handling the merging on its own. It should check that > the new page doesn't form part of the previous page. The > rq_for_each_segment() iterates all single bits in the request, not dma > segments. The "easiest" way to do this is to call blk_rq_map_sg() and > then iterate the mapped sg list. That will give you what you are > looking for. > Here's a test patch, compiles but otherwise untested. I spent more > time figuring out how to enable XEN than to code it up, so YMMV! > Probably the sg list wants to be put inside the ring and only > initialized on allocation, then you can get rid of the sg on stack and > sg_init_table() loop call in the function. I'll leave that, and the > testing, to you. [Moved sg array into info structure, and initialize once. -J] Signed-off-by: Jens Axboe Signed-off-by: Jeremy Fitzhardinge --- drivers/block/xen-blkfront.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 918ef725de41..b6c8ce254359 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -82,6 +83,7 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; + struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -204,12 +206,11 @@ static int blkif_queue_request(struct request *req) struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_mfn; struct blkif_request *ring_req; - struct req_iterator iter; - struct bio_vec *bvec; unsigned long id; unsigned int fsect, lsect; - int ref; + int i, ref; grant_ref_t gref_head; + struct scatterlist *sg; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; @@ -238,12 +239,13 @@ static int blkif_queue_request(struct request *req) if (blk_barrier_rq(req)) ring_req->operation = BLKIF_OP_WRITE_BARRIER; - ring_req->nr_segments = 0; - rq_for_each_segment(bvec, req, iter) { - BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST); - buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); + fsect = sg->offset >> 9; + lsect = fsect + (sg->length >> 9) - 1; /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); @@ -254,16 +256,12 @@ static int blkif_queue_request(struct request *req) buffer_mfn, rq_data_dir(req) ); - info->shadow[id].frame[ring_req->nr_segments] = - mfn_to_pfn(buffer_mfn); - - ring_req->seg[ring_req->nr_segments] = + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); + ring_req->seg[i] = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; - - ring_req->nr_segments++; } info->ring.req_prod_pvt++; @@ -622,6 +620,8 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); -- cgit