diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /fs/btrfs/extent_io.c | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 406 |
1 files changed, 252 insertions, 154 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3cd57825c75f..66a755150056 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@ #include "locking.h" #include "rcu-string.h" #include "backref.h" +#include "transaction.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu " - "refs %d\n", + pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); @@ -163,13 +163,13 @@ int __init extent_io_init(void) { extent_state_cache = kmem_cache_create("btrfs_extent_state", sizeof(struct extent_state), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!extent_state_cache) return -ENOMEM; extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", sizeof(struct extent_buffer), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + SLAB_MEM_SPREAD, NULL); if (!extent_buffer_cache) goto free_state_cache; @@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk(KERN_ERR "BTRFS: found node %llu %llu on insert of " - "%llu %llu\n", + pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n", found->start, found->end, start, end); return -EEXIST; } @@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - btrfs_panic(tree_fs_info(tree), err, "Locking error: " - "Extent tree was modified by another " - "thread while locked."); + btrfs_panic(tree_fs_info(tree), err, + "Locking error: Extent tree was modified by another thread while locked."); } /* @@ -1729,7 +1727,7 @@ out_failed: } void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, - struct page *locked_page, + u64 delalloc_end, struct page *locked_page, unsigned clear_bits, unsigned long page_ops) { @@ -2025,9 +2023,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, bio->bi_iter.bi_size = 0; map_length = length; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are doing the + * read repair operation. + */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_block(fs_info, WRITE, logical, &map_length, &bbio, mirror_num); if (ret) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } @@ -2037,14 +2042,17 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, dev = bbio->stripes[mirror_num-1].dev; btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } bio->bi_bdev = dev->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); bio_add_page(bio, page, length, pg_offset); - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { + if (btrfsic_submit_bio_wait(bio)) { /* try to remap that extent elsewhere? */ + btrfs_bio_counter_dec(fs_info); bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; @@ -2054,6 +2062,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, "read error corrected: ino %llu off %llu (dev %s sector %llu)", btrfs_ino(inode), start, rcu_str_deref(dev->name), sector); + btrfs_bio_counter_dec(fs_info); bio_put(bio); return 0; } @@ -2111,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page, if (failrec->in_validation) { /* there was no real error, just free the record */ - pr_debug("clean_io_failure: freeing dummy error at %llu\n", - failrec->start); + btrfs_debug(fs_info, + "clean_io_failure: freeing dummy error at %llu", + failrec->start); goto out; } if (fs_info->sb->s_flags & MS_RDONLY) @@ -2178,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, struct io_failure_record **failrec_ret) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct io_failure_record *failrec; struct extent_map *em; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -2225,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, em->compress_type); } - pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", - logical, start, failrec->len); + btrfs_debug(fs_info, + "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", + logical, start, failrec->len); failrec->logical = logical; free_extent_map(em); @@ -2244,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return ret; } } else { - pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", - failrec->logical, failrec->start, failrec->len, - failrec->in_validation); + btrfs_debug(fs_info, + "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", + failrec->logical, failrec->start, failrec->len, + failrec->in_validation); /* * when data can be on disk more than twice, add to failrec here * (e.g. with a list for failed_mirror) to make @@ -2262,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, int failed_mirror) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int num_copies; - num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, - failrec->logical, failrec->len); + num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies == 1) { /* * we only have a single copy of the data, so don't bother with * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2312,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, } if (failrec->this_mirror > num_copies) { - pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", - num_copies, failrec->this_mirror, failed_mirror); + btrfs_debug(fs_info, + "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", + num_copies, failrec->this_mirror, failed_mirror); return 0; } @@ -2376,7 +2391,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, int read_mode; int ret; - BUG_ON(failed_bio->bi_rw & REQ_WRITE); + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) @@ -2402,12 +2417,13 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, free_io_failure(inode, failrec); return -EIO; } + bio_set_op_attrs(bio, REQ_OP_READ, read_mode); - pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", - read_mode, failrec->this_mirror, failrec->in_validation); + btrfs_debug(btrfs_sb(inode->i_sb), + "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", + read_mode, failrec->this_mirror, failrec->in_validation); - ret = tree->ops->submit_bio_hook(inode, read_mode, bio, - failrec->this_mirror, + ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, failrec->bio_flags, 0); if (ret) { free_io_failure(inode, failrec); @@ -2473,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio) bvec->bv_offset, bvec->bv_len); else btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page write in btrfs with offset %u and " - "length %u", + "incomplete page write in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -2530,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%u\n", (u64)bio->bi_iter.bi_sector, - bio->bi_error, io_bio->mirror_num); + btrfs_debug(fs_info, + "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", + (u64)bio->bi_iter.bi_sector, bio->bi_error, + io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; /* We always issue full-page reads, but if some block @@ -2543,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio) * if they don't add up to a full page. */ if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) - btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, - "partial page read in btrfs with offset %u and length %u", + btrfs_err(fs_info, + "partial page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); else - btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, - "incomplete page read in btrfs with offset %u and " - "length %u", + btrfs_info(fs_info, + "incomplete page read in btrfs with offset %u and length %u", bvec->bv_offset, bvec->bv_len); } @@ -2686,12 +2702,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) btrfs_bio->csum = NULL; btrfs_bio->csum_allocated = NULL; btrfs_bio->end_io = NULL; - -#ifdef CONFIG_BLK_CGROUP - /* FIXME, put this into bio_clone_bioset */ - if (bio->bi_css) - bio_associate_blkcg(new, bio->bi_css); -#endif } return new; } @@ -2713,8 +2723,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) } -static int __must_check submit_one_bio(int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __must_check submit_one_bio(struct bio *bio, int mirror_num, + unsigned long bio_flags) { int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -2725,33 +2735,31 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, start = page_offset(page) + bvec->bv_offset; bio->bi_private = NULL; - bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) - ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + ret = tree->ops->submit_bio_hook(page->mapping->host, bio, mirror_num, bio_flags, start); else - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); bio_put(bio); return ret; } -static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page, +static int merge_bio(struct extent_io_tree *tree, struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { int ret = 0; if (tree->ops && tree->ops->merge_bio_hook) - ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio, + ret = tree->ops->merge_bio_hook(page, offset, size, bio, bio_flags); - BUG_ON(ret < 0); return ret; } -static int submit_extent_page(int rw, struct extent_io_tree *tree, +static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, struct writeback_control *wbc, struct page *page, sector_t sector, size_t size, unsigned long offset, @@ -2779,10 +2787,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (prev_bio_flags != bio_flags || !contig || force_bio_submit || - merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || + merge_bio(tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { - ret = submit_one_bio(rw, bio, mirror_num, - prev_bio_flags); + ret = submit_one_bio(bio, mirror_num, prev_bio_flags); if (ret < 0) { *bio_ret = NULL; return ret; @@ -2803,6 +2810,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + bio_set_op_attrs(bio, op, op_flags); if (wbc) { wbc_init_bio(wbc, bio); wbc_account_io(wbc, page, page_size); @@ -2811,7 +2819,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret) *bio_ret = bio; else - ret = submit_one_bio(rw, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -2869,13 +2877,14 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, * into the tree that are removed when the IO is done (by the end_io * handlers) * XXX JDM: This needs looking at to ensure proper page locking + * return 0 on success, otherwise return error */ static int __do_readpage(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, int read_flags, u64 *prev_em_start) { struct inode *inode = page->mapping->host; @@ -2890,7 +2899,7 @@ static int __do_readpage(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - int ret; + int ret = 0; int nr = 0; size_t pg_offset = 0; size_t iosize; @@ -3058,8 +3067,8 @@ static int __do_readpage(struct extent_io_tree *tree, } pnr -= page->index; - ret = submit_extent_page(rw, tree, NULL, page, - sector, disk_io_size, pg_offset, + ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL, + page, sector, disk_io_size, pg_offset, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, @@ -3071,6 +3080,7 @@ static int __do_readpage(struct extent_io_tree *tree, } else { SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1); + goto out; } cur = cur + iosize; pg_offset += iosize; @@ -3081,7 +3091,7 @@ out: SetPageUptodate(page); unlock_page(page); } - return 0; + return ret; } static inline void __do_contiguous_readpages(struct extent_io_tree *tree, @@ -3090,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { struct inode *inode; @@ -3111,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw, prev_em_start); + mirror_num, bio_flags, 0, prev_em_start); put_page(pages[index]); } } @@ -3121,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { u64 start = 0; @@ -3143,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw, prev_em_start); + prev_em_start); start = page_start; end = start + PAGE_SIZE - 1; first_index = index; @@ -3154,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw, + mirror_num, bio_flags, prev_em_start); } @@ -3162,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int read_flags) { struct inode *inode = page->mapping->host; struct btrfs_ordered_extent *ordered; @@ -3182,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw, NULL); + bio_flags, read_flags, NULL); return ret; } @@ -3194,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, int ret; ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, 0); if (bio) - ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -3430,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, page->index, cur, end); } - ret = submit_extent_page(write_flags, tree, wbc, page, - sector, iosize, pg_offset, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + page, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0, 0, 0, false); @@ -3470,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, size_t pg_offset = 0; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_SHIFT; - int write_flags; + int write_flags = 0; unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) write_flags = WRITE_SYNC; - else - write_flags = WRITE; trace___extent_writepage(page, inode, wbc); @@ -3621,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; - struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) @@ -3667,13 +3674,13 @@ static void set_btree_ioerr(struct page *page) */ switch (eb->log_index) { case -1: - set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags); break; case 0: - set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags); break; case 1: - set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags); break; default: BUG(); /* unexpected, logic error */ @@ -3718,9 +3725,11 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct block_device *bdev = fs_info->fs_devices->latest_bdev; struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; + u32 nritems; unsigned long i, num_pages; unsigned long bio_flags = 0; - int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; + unsigned long start, end; + int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -3729,14 +3738,32 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) bio_flags = EXTENT_BIO_TREE_LOG; + /* set btree blocks beyond nritems with 0 to avoid stale content. */ + nritems = btrfs_header_nritems(eb); + if (btrfs_header_level(eb) > 0) { + end = btrfs_node_key_ptr_offset(nritems); + + memset_extent_buffer(eb, 0, end, eb->len - end); + } else { + /* + * leaf: + * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 + */ + start = btrfs_item_nr_offset(nritems); + end = btrfs_leaf_data(eb) + + leaf_data_end(fs_info->tree_root, eb); + memset_extent_buffer(eb, 0, start, end - start); + } + for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; clear_page_dirty_for_io(p); set_page_writeback(p); - ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, - PAGE_SIZE, 0, bdev, &epd->bio, - -1, end_bio_extent_buffer_writepage, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + p, offset >> 9, PAGE_SIZE, 0, bdev, + &epd->bio, -1, + end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { @@ -4046,13 +4073,12 @@ retry: static void flush_epd_write_bio(struct extent_page_data *epd) { if (epd->bio) { - int rw = WRITE; int ret; - if (epd->sync_io) - rw = WRITE_SYNC; + bio_set_op_attrs(epd->bio, REQ_OP_WRITE, + epd->sync_io ? WRITE_SYNC : 0); - ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); + ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; } @@ -4170,7 +4196,8 @@ int extent_readpages(struct extent_io_tree *tree, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, GFP_NOFS)) { + page->index, + readahead_gfp_mask(mapping))) { put_page(page); continue; } @@ -4179,19 +4206,19 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) - return submit_one_bio(READ, bio, 0, bio_flags); + return submit_one_bio(bio, 0, bio_flags); return 0; } @@ -4483,21 +4510,36 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else if (fieinfo->fi_extents_max) { + struct btrfs_trans_handle *trans; + u64 bytenr = em->block_start - (em->start - em->orig_start); disko = em->block_start + offset_in_extent; /* + * We need a trans handle to get delayed refs + */ + trans = btrfs_join_transaction(root); + /* + * It's OK if we can't start a trans we can still check + * from commit_root + */ + if (IS_ERR(trans)) + trans = NULL; + + /* * As btrfs supports shared space, this information * can be exported to userspace tools via * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 * then we're just getting a count and we can skip the * lookup stuff. */ - ret = btrfs_check_shared(NULL, root->fs_info, + ret = btrfs_check_shared(trans, root->fs_info, root->objectid, btrfs_ino(inode), bytenr); + if (trans) + btrfs_end_transaction(trans, root); if (ret < 0) goto out_free; if (ret) @@ -4718,16 +4760,16 @@ err: } struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { unsigned long len; if (!fs_info) { /* * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 + * available */ - len = 4096; + len = nodesize; } else { len = fs_info->tree_root->nodesize; } @@ -4823,7 +4865,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4831,7 +4873,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(fs_info, start); + eb = alloc_dummy_extent_buffer(fs_info, start, nodesize); if (!eb) return NULL; eb->fs_info = fs_info; @@ -4882,18 +4924,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4938,8 +4987,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, @@ -5160,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb) } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, u64 start, int wait, + struct extent_buffer *eb, int wait, get_extent_t *get_extent, int mirror_num) { unsigned long i; - unsigned long start_i; struct page *page; int err; int ret = 0; @@ -5178,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_SHIFT) - - (eb->start >> PAGE_SHIFT); - } else { - start_i = 0; - } - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (wait == WAIT_NONE) { if (!trylock_page(page)) @@ -5196,38 +5238,62 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; + } + /* + * We need to firstly lock all pages to make sure that + * the uptodate bit of our pages won't be affected by + * clear_extent_buffer_uptodate(). + */ + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; if (!PageUptodate(page)) { num_reads++; all_uptodate = 0; } } + if (all_uptodate) { - if (start_i == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); goto unlock_exit; } clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; + if (!PageUptodate(page)) { + if (ret) { + atomic_dec(&eb->io_pages); + unlock_page(page); + continue; + } + ClearPageError(page); err = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, &bio_flags, - READ | REQ_META); - if (err) + REQ_META); + if (err) { ret = err; + /* + * We use &bio in above __extent_read_full_page, + * so we ensure that if it returns error, the + * current page fails to add itself to bio and + * it's been unlocked. + * + * We must dec io_pages by ourselves. + */ + atomic_dec(&eb->io_pages); + } } else { unlock_page(page); } } if (bio) { - err = submit_one_bio(READ | REQ_META, bio, mirror_num, - bio_flags); + err = submit_one_bio(bio, mirror_num, bio_flags); if (err) return err; } @@ -5235,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (ret || wait != WAIT_COMPLETE) return ret; - for (i = start_i; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page = eb->pages[i]; wait_on_page_locked(page); if (!PageUptodate(page)) @@ -5245,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, return ret; unlock_exit: - i = start_i; while (locked_pages > 0) { - page = eb->pages[i]; - i++; - unlock_page(page); locked_pages--; + page = eb->pages[locked_pages]; + unlock_page(page); } return ret; } @@ -5323,6 +5387,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } +/* + * return 0 if the item is found within a page. + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, @@ -5337,7 +5406,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, PAGE_SHIFT; if (i != end_i) - return -EINVAL; + return 1; if (i == 0) { offset = start_offset; @@ -5348,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, } if (start + min_len > eb->len) { - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", + WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); return -EINVAL; } @@ -5490,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, } } -/* - * The extent buffer bitmap operations are done with byte granularity because - * bitmap items are not guaranteed to be aligned to a word and therefore a - * single word in a bitmap may straddle two pages in the extent buffer. - */ -#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) -#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) -#define BITMAP_FIRST_BYTE_MASK(start) \ - ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) -#define BITMAP_LAST_BYTE_MASK(nbits) \ - (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) +void le_bitmap_set(u8 *map, unsigned int start, int len) +{ + u8 *p = map + BIT_BYTE(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); + u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_BYTE; + mask_to_set = ~(u8)0; + p++; + } + if (len) { + mask_to_set &= BITMAP_LAST_BYTE_MASK(size); + *p |= mask_to_set; + } +} + +void le_bitmap_clear(u8 *map, unsigned int start, int len) +{ + u8 *p = map + BIT_BYTE(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE); + u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_BYTE; + mask_to_clear = ~(u8)0; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); + *p &= ~mask_to_clear; + } +} /* * eb_bitmap_offset() - calculate the page and offset of the byte containing the @@ -5544,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb, int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, unsigned long nr) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; @@ -5566,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; const unsigned int size = pos + len; int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); - unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); + u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); eb_bitmap_offset(eb, start, pos, &i, &offset); page = eb->pages[i]; @@ -5583,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, kaddr[offset] |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_BYTE; - mask_to_set = ~0U; + mask_to_set = ~(u8)0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; @@ -5608,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len) { - char *kaddr; + u8 *kaddr; struct page *page; unsigned long i; size_t offset; const unsigned int size = pos + len; int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); - unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); + u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); eb_bitmap_offset(eb, start, pos, &i, &offset); page = eb->pages[i]; @@ -5625,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, kaddr[offset] &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_BYTE; - mask_to_clear = ~0U; + mask_to_clear = ~(u8)0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; @@ -5679,14 +5775,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (src_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus src_offset %lu move " - "len %lu dst len %lu", src_offset, len, dst->len); + "memmove bogus src_offset %lu move len %lu dst len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { btrfs_err(dst->fs_info, - "memmove bogus dst_offset %lu move " - "len %lu dst len %lu", dst_offset, len, dst->len); + "memmove bogus dst_offset %lu move len %lu dst len %lu", + dst_offset, len, dst->len); BUG_ON(1); } @@ -5726,13 +5822,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move " - "len %lu len %lu", src_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus src_offset %lu move len %lu len %lu", + src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move " - "len %lu len %lu", dst_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus dst_offset %lu move len %lu len %lu", + dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { |