diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
| -rw-r--r-- | fs/btrfs/extent_io.c | 234 | 
1 files changed, 149 insertions, 85 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 44fe66b53c8b..66a755150056 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@  #include "locking.h"  #include "rcu-string.h"  #include "backref.h" +#include "transaction.h"  static struct kmem_cache *extent_state_cache;  static struct kmem_cache *extent_buffer_cache; @@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void)  	while (!list_empty(&buffers)) {  		eb = list_entry(buffers.next, struct extent_buffer, leak_list); -		printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu " -		       "refs %d\n", +		pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",  		       eb->start, eb->len, atomic_read(&eb->refs));  		list_del(&eb->leak_list);  		kmem_cache_free(extent_buffer_cache, eb); @@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree,  	if (node) {  		struct extent_state *found;  		found = rb_entry(node, struct extent_state, rb_node); -		printk(KERN_ERR "BTRFS: found node %llu %llu on insert of " -		       "%llu %llu\n", +		pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",  		       found->start, found->end, start, end);  		return -EEXIST;  	} @@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc)  static void extent_io_tree_panic(struct extent_io_tree *tree, int err)  { -	btrfs_panic(tree_fs_info(tree), err, "Locking error: " -		    "Extent tree was modified by another " -		    "thread while locked."); +	btrfs_panic(tree_fs_info(tree), err, +		    "Locking error: Extent tree was modified by another thread while locked.");  }  /* @@ -1729,7 +1727,7 @@ out_failed:  }  void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, -				 struct page *locked_page, +				 u64 delalloc_end, struct page *locked_page,  				 unsigned clear_bits,  				 unsigned long page_ops)  { @@ -2122,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,  	if (failrec->in_validation) {  		/* there was no real error, just free the record */ -		pr_debug("clean_io_failure: freeing dummy error at %llu\n", -			 failrec->start); +		btrfs_debug(fs_info, +			"clean_io_failure: freeing dummy error at %llu", +			failrec->start);  		goto out;  	}  	if (fs_info->sb->s_flags & MS_RDONLY) @@ -2189,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)  int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,  		struct io_failure_record **failrec_ret)  { +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct io_failure_record *failrec;  	struct extent_map *em;  	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -2236,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,  						 em->compress_type);  		} -		pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", -			 logical, start, failrec->len); +		btrfs_debug(fs_info, +			"Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", +			logical, start, failrec->len);  		failrec->logical = logical;  		free_extent_map(em); @@ -2255,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,  			return ret;  		}  	} else { -		pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", -			 failrec->logical, failrec->start, failrec->len, -			 failrec->in_validation); +		btrfs_debug(fs_info, +			"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", +			failrec->logical, failrec->start, failrec->len, +			failrec->in_validation);  		/*  		 * when data can be on disk more than twice, add to failrec here  		 * (e.g. with a list for failed_mirror) to make @@ -2273,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,  int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,  			   struct io_failure_record *failrec, int failed_mirror)  { +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	int num_copies; -	num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, -				      failrec->logical, failrec->len); +	num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);  	if (num_copies == 1) {  		/*  		 * we only have a single copy of the data, so don't bother with  		 * all the retry and error correction code that follows. no  		 * matter what the error is, it is very likely to persist.  		 */ -		pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", -			 num_copies, failrec->this_mirror, failed_mirror); +		btrfs_debug(fs_info, +			"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", +			num_copies, failrec->this_mirror, failed_mirror);  		return 0;  	} @@ -2323,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,  	}  	if (failrec->this_mirror > num_copies) { -		pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", -			 num_copies, failrec->this_mirror, failed_mirror); +		btrfs_debug(fs_info, +			"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", +			num_copies, failrec->this_mirror, failed_mirror);  		return 0;  	} @@ -2415,8 +2419,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,  	}  	bio_set_op_attrs(bio, REQ_OP_READ, read_mode); -	pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", -		 read_mode, failrec->this_mirror, failrec->in_validation); +	btrfs_debug(btrfs_sb(inode->i_sb), +		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", +		read_mode, failrec->this_mirror, failrec->in_validation);  	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,  					 failrec->bio_flags, 0); @@ -2484,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio)  					bvec->bv_offset, bvec->bv_len);  			else  				btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, -				   "incomplete page write in btrfs with offset %u and " -				   "length %u", +				   "incomplete page write in btrfs with offset %u and length %u",  					bvec->bv_offset, bvec->bv_len);  		} @@ -2541,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio)  	bio_for_each_segment_all(bvec, bio, i) {  		struct page *page = bvec->bv_page;  		struct inode *inode = page->mapping->host; +		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -		pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " -			 "mirror=%u\n", (u64)bio->bi_iter.bi_sector, -			 bio->bi_error, io_bio->mirror_num); +		btrfs_debug(fs_info, +			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", +			(u64)bio->bi_iter.bi_sector, bio->bi_error, +			io_bio->mirror_num);  		tree = &BTRFS_I(inode)->io_tree;  		/* We always issue full-page reads, but if some block @@ -2554,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio)  		 * if they don't add up to a full page.  */  		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {  			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) -				btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info, -				   "partial page read in btrfs with offset %u and length %u", +				btrfs_err(fs_info, +					"partial page read in btrfs with offset %u and length %u",  					bvec->bv_offset, bvec->bv_len);  			else -				btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info, -				   "incomplete page read in btrfs with offset %u and " -				   "length %u", +				btrfs_info(fs_info, +					"incomplete page read in btrfs with offset %u and length %u",  					bvec->bv_offset, bvec->bv_len);  		} @@ -3624,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)  static void set_btree_ioerr(struct page *page)  {  	struct extent_buffer *eb = (struct extent_buffer *)page->private; -	struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);  	SetPageError(page);  	if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) @@ -3670,13 +3674,13 @@ static void set_btree_ioerr(struct page *page)  	 */  	switch (eb->log_index) {  	case -1: -		set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); +		set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);  		break;  	case 0: -		set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); +		set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);  		break;  	case 1: -		set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); +		set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);  		break;  	default:  		BUG(); /* unexpected, logic error */ @@ -3721,8 +3725,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,  	struct block_device *bdev = fs_info->fs_devices->latest_bdev;  	struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;  	u64 offset = eb->start; +	u32 nritems;  	unsigned long i, num_pages;  	unsigned long bio_flags = 0; +	unsigned long start, end;  	int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;  	int ret = 0; @@ -3732,6 +3738,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,  	if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)  		bio_flags = EXTENT_BIO_TREE_LOG; +	/* set btree blocks beyond nritems with 0 to avoid stale content. */ +	nritems = btrfs_header_nritems(eb); +	if (btrfs_header_level(eb) > 0) { +		end = btrfs_node_key_ptr_offset(nritems); + +		memset_extent_buffer(eb, 0, end, eb->len - end); +	} else { +		/* +		 * leaf: +		 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 +		 */ +		start = btrfs_item_nr_offset(nritems); +		end = btrfs_leaf_data(eb) + +		      leaf_data_end(fs_info->tree_root, eb); +		memset_extent_buffer(eb, 0, start, end - start); +	} +  	for (i = 0; i < num_pages; i++) {  		struct page *p = eb->pages[i]; @@ -4487,21 +4510,36 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  			flags |= (FIEMAP_EXTENT_DELALLOC |  				  FIEMAP_EXTENT_UNKNOWN);  		} else if (fieinfo->fi_extents_max) { +			struct btrfs_trans_handle *trans; +  			u64 bytenr = em->block_start -  				(em->start - em->orig_start);  			disko = em->block_start + offset_in_extent;  			/* +			 * We need a trans handle to get delayed refs +			 */ +			trans = btrfs_join_transaction(root); +			/* +			 * It's OK if we can't start a trans we can still check +			 * from commit_root +			 */ +			if (IS_ERR(trans)) +				trans = NULL; + +			/*  			 * As btrfs supports shared space, this information  			 * can be exported to userspace tools via  			 * flag FIEMAP_EXTENT_SHARED.  If fi_extents_max == 0  			 * then we're just getting a count and we can skip the  			 * lookup stuff.  			 */ -			ret = btrfs_check_shared(NULL, root->fs_info, +			ret = btrfs_check_shared(trans, root->fs_info,  						 root->objectid,  						 btrfs_ino(inode), bytenr); +			if (trans) +				btrfs_end_transaction(trans, root);  			if (ret < 0)  				goto out_free;  			if (ret) @@ -5173,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb)  }  int read_extent_buffer_pages(struct extent_io_tree *tree, -			     struct extent_buffer *eb, u64 start, int wait, +			     struct extent_buffer *eb, int wait,  			     get_extent_t *get_extent, int mirror_num)  {  	unsigned long i; -	unsigned long start_i;  	struct page *page;  	int err;  	int ret = 0; @@ -5191,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))  		return 0; -	if (start) { -		WARN_ON(start < eb->start); -		start_i = (start >> PAGE_SHIFT) - -			(eb->start >> PAGE_SHIFT); -	} else { -		start_i = 0; -	} -  	num_pages = num_extent_pages(eb->start, eb->len); -	for (i = start_i; i < num_pages; i++) { +	for (i = 0; i < num_pages; i++) {  		page = eb->pages[i];  		if (wait == WAIT_NONE) {  			if (!trylock_page(page)) @@ -5209,21 +5238,29 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  			lock_page(page);  		}  		locked_pages++; +	} +	/* +	 * We need to firstly lock all pages to make sure that +	 * the uptodate bit of our pages won't be affected by +	 * clear_extent_buffer_uptodate(). +	 */ +	for (i = 0; i < num_pages; i++) { +		page = eb->pages[i];  		if (!PageUptodate(page)) {  			num_reads++;  			all_uptodate = 0;  		}  	} +  	if (all_uptodate) { -		if (start_i == 0) -			set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); +		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);  		goto unlock_exit;  	}  	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);  	eb->read_mirror = 0;  	atomic_set(&eb->io_pages, num_reads); -	for (i = start_i; i < num_pages; i++) { +	for (i = 0; i < num_pages; i++) {  		page = eb->pages[i];  		if (!PageUptodate(page)) { @@ -5264,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  	if (ret || wait != WAIT_COMPLETE)  		return ret; -	for (i = start_i; i < num_pages; i++) { +	for (i = 0; i < num_pages; i++) {  		page = eb->pages[i];  		wait_on_page_locked(page);  		if (!PageUptodate(page)) @@ -5274,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  	return ret;  unlock_exit: -	i = start_i;  	while (locked_pages > 0) { -		page = eb->pages[i]; -		i++; -		unlock_page(page);  		locked_pages--; +		page = eb->pages[locked_pages]; +		unlock_page(page);  	}  	return ret;  } @@ -5382,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,  	}  	if (start + min_len > eb->len) { -		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " -		       "wanted %lu %lu\n", +		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",  		       eb->start, eb->len, start, min_len);  		return -EINVAL;  	} @@ -5524,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,  	}  } -/* - * The extent buffer bitmap operations are done with byte granularity because - * bitmap items are not guaranteed to be aligned to a word and therefore a - * single word in a bitmap may straddle two pages in the extent buffer. - */ -#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) -#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) -#define BITMAP_FIRST_BYTE_MASK(start) \ -	((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) -#define BITMAP_LAST_BYTE_MASK(nbits) \ -	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) +void le_bitmap_set(u8 *map, unsigned int start, int len) +{ +	u8 *p = map + BIT_BYTE(start); +	const unsigned int size = start + len; +	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); +	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); + +	while (len - bits_to_set >= 0) { +		*p |= mask_to_set; +		len -= bits_to_set; +		bits_to_set = BITS_PER_BYTE; +		mask_to_set = ~(u8)0; +		p++; +	} +	if (len) { +		mask_to_set &= BITMAP_LAST_BYTE_MASK(size); +		*p |= mask_to_set; +	} +} + +void le_bitmap_clear(u8 *map, unsigned int start, int len) +{ +	u8 *p = map + BIT_BYTE(start); +	const unsigned int size = start + len; +	int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE); +	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start); + +	while (len - bits_to_clear >= 0) { +		*p &= ~mask_to_clear; +		len -= bits_to_clear; +		bits_to_clear = BITS_PER_BYTE; +		mask_to_clear = ~(u8)0; +		p++; +	} +	if (len) { +		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); +		*p &= ~mask_to_clear; +	} +}  /*   * eb_bitmap_offset() - calculate the page and offset of the byte containing the @@ -5578,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,  int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,  			   unsigned long nr)  { -	char *kaddr; +	u8 *kaddr;  	struct page *page;  	unsigned long i;  	size_t offset; @@ -5600,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,  void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,  			      unsigned long pos, unsigned long len)  { -	char *kaddr; +	u8 *kaddr;  	struct page *page;  	unsigned long i;  	size_t offset;  	const unsigned int size = pos + len;  	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); -	unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); +	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);  	eb_bitmap_offset(eb, start, pos, &i, &offset);  	page = eb->pages[i]; @@ -5617,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,  		kaddr[offset] |= mask_to_set;  		len -= bits_to_set;  		bits_to_set = BITS_PER_BYTE; -		mask_to_set = ~0U; +		mask_to_set = ~(u8)0;  		if (++offset >= PAGE_SIZE && len > 0) {  			offset = 0;  			page = eb->pages[++i]; @@ -5642,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,  void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,  				unsigned long pos, unsigned long len)  { -	char *kaddr; +	u8 *kaddr;  	struct page *page;  	unsigned long i;  	size_t offset;  	const unsigned int size = pos + len;  	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); -	unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); +	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);  	eb_bitmap_offset(eb, start, pos, &i, &offset);  	page = eb->pages[i]; @@ -5659,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,  		kaddr[offset] &= ~mask_to_clear;  		len -= bits_to_clear;  		bits_to_clear = BITS_PER_BYTE; -		mask_to_clear = ~0U; +		mask_to_clear = ~(u8)0;  		if (++offset >= PAGE_SIZE && len > 0) {  			offset = 0;  			page = eb->pages[++i]; @@ -5713,14 +5775,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,  	if (src_offset + len > dst->len) {  		btrfs_err(dst->fs_info, -			"memmove bogus src_offset %lu move " -		       "len %lu dst len %lu", src_offset, len, dst->len); +			"memmove bogus src_offset %lu move len %lu dst len %lu", +			 src_offset, len, dst->len);  		BUG_ON(1);  	}  	if (dst_offset + len > dst->len) {  		btrfs_err(dst->fs_info, -			"memmove bogus dst_offset %lu move " -		       "len %lu dst len %lu", dst_offset, len, dst->len); +			"memmove bogus dst_offset %lu move len %lu dst len %lu", +			 dst_offset, len, dst->len);  		BUG_ON(1);  	} @@ -5760,13 +5822,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,  	unsigned long src_i;  	if (src_offset + len > dst->len) { -		btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move " -		       "len %lu len %lu", src_offset, len, dst->len); +		btrfs_err(dst->fs_info, +			  "memmove bogus src_offset %lu move len %lu len %lu", +			  src_offset, len, dst->len);  		BUG_ON(1);  	}  	if (dst_offset + len > dst->len) { -		btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move " -		       "len %lu len %lu", dst_offset, len, dst->len); +		btrfs_err(dst->fs_info, +			  "memmove bogus dst_offset %lu move len %lu len %lu", +			  dst_offset, len, dst->len);  		BUG_ON(1);  	}  	if (dst_offset < src_offset) {  |