diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
| -rw-r--r-- | fs/btrfs/scrub.c | 230 | 
1 files changed, 99 insertions, 131 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 8f6ceea33969..2e9a322773f2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -39,21 +39,20 @@ struct scrub_block;  struct scrub_ctx;  /* - * the following three values only influence the performance. + * The following three values only influence the performance. + *   * The last one configures the number of parallel and outstanding I/O - * operations. The first two values configure an upper limit for the number + * operations. The first one configures an upper limit for the number   * of (dynamically allocated) pages that are added to a bio.   */ -#define SCRUB_PAGES_PER_RD_BIO	32	/* 128k per bio */ -#define SCRUB_PAGES_PER_WR_BIO	32	/* 128k per bio */ -#define SCRUB_BIOS_PER_SCTX	64	/* 8MB per device in flight */ +#define SCRUB_PAGES_PER_BIO	32	/* 128KiB per bio for x86 */ +#define SCRUB_BIOS_PER_SCTX	64	/* 8MiB per device in flight for x86 */  /* - * the following value times PAGE_SIZE needs to be large enough to match the + * The following value times PAGE_SIZE needs to be large enough to match the   * largest node/leaf/sector size that shall be supported. - * Values larger than BTRFS_STRIPE_LEN are not supported.   */ -#define SCRUB_MAX_PAGES_PER_BLOCK	16	/* 64k per node/leaf/sector */ +#define SCRUB_MAX_PAGES_PER_BLOCK	(BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K)  struct scrub_recover {  	refcount_t		refs; @@ -88,11 +87,7 @@ struct scrub_bio {  	blk_status_t		status;  	u64			logical;  	u64			physical; -#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO -	struct scrub_page	*pagev[SCRUB_PAGES_PER_WR_BIO]; -#else -	struct scrub_page	*pagev[SCRUB_PAGES_PER_RD_BIO]; -#endif +	struct scrub_page	*pagev[SCRUB_PAGES_PER_BIO];  	int			page_count;  	int			next_free;  	struct btrfs_work	work; @@ -163,7 +158,7 @@ struct scrub_ctx {  	struct list_head	csum_list;  	atomic_t		cancel_req;  	int			readonly; -	int			pages_per_rd_bio; +	int			pages_per_bio;  	/* State of IO submission throttling affecting the associated device */  	ktime_t			throttle_deadline; @@ -174,7 +169,6 @@ struct scrub_ctx {  	struct scrub_bio        *wr_curr_bio;  	struct mutex            wr_lock; -	int                     pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */  	struct btrfs_device     *wr_tgtdev;  	bool                    flush_all_writes; @@ -578,7 +572,7 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(  		goto nomem;  	refcount_set(&sctx->refs, 1);  	sctx->is_dev_replace = is_dev_replace; -	sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; +	sctx->pages_per_bio = SCRUB_PAGES_PER_BIO;  	sctx->curr = -1;  	sctx->fs_info = fs_info;  	INIT_LIST_HEAD(&sctx->csum_list); @@ -616,7 +610,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(  	sctx->wr_curr_bio = NULL;  	if (is_dev_replace) {  		WARN_ON(!fs_info->dev_replace.tgtdev); -		sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;  		sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;  		sctx->flush_all_writes = false;  	} @@ -758,7 +751,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)  	eb = path->nodes[0];  	ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); -	item_size = btrfs_item_size_nr(eb, path->slots[0]); +	item_size = btrfs_item_size(eb, path->slots[0]);  	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {  		do { @@ -852,8 +845,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)  	have_csum = sblock_to_check->pagev[0]->have_csum;  	dev = sblock_to_check->pagev[0]->dev; -	if (btrfs_is_zoned(fs_info) && !sctx->is_dev_replace) -		return btrfs_repair_one_zone(fs_info, logical); +	if (!sctx->is_dev_replace && btrfs_repair_one_zone(fs_info, logical)) +		return 0;  	/*  	 * We must use GFP_NOFS because the scrub task might be waiting for a @@ -1313,7 +1306,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,  		recover->bioc = bioc;  		recover->map_length = mapped_length; -		BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK); +		ASSERT(page_index < SCRUB_MAX_PAGES_PER_BLOCK);  		nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS); @@ -1675,7 +1668,7 @@ again:  		sbio->dev = sctx->wr_tgtdev;  		bio = sbio->bio;  		if (!bio) { -			bio = btrfs_bio_alloc(sctx->pages_per_wr_bio); +			bio = btrfs_bio_alloc(sctx->pages_per_bio);  			sbio->bio = bio;  		} @@ -1708,7 +1701,7 @@ again:  	sbio->pagev[sbio->page_count] = spage;  	scrub_page_get(spage);  	sbio->page_count++; -	if (sbio->page_count == sctx->pages_per_wr_bio) +	if (sbio->page_count == sctx->pages_per_bio)  		scrub_wr_submit(sctx);  	mutex_unlock(&sctx->wr_lock); @@ -1755,7 +1748,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)  	struct scrub_ctx *sctx = sbio->sctx;  	int i; -	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); +	ASSERT(sbio->page_count <= SCRUB_PAGES_PER_BIO);  	if (sbio->status) {  		struct btrfs_dev_replace *dev_replace =  			&sbio->sctx->fs_info->dev_replace; @@ -2101,7 +2094,7 @@ again:  		sbio->dev = spage->dev;  		bio = sbio->bio;  		if (!bio) { -			bio = btrfs_bio_alloc(sctx->pages_per_rd_bio); +			bio = btrfs_bio_alloc(sctx->pages_per_bio);  			sbio->bio = bio;  		} @@ -2135,7 +2128,7 @@ again:  	scrub_block_get(sblock); /* one for the page added to the bio */  	atomic_inc(&sblock->outstanding_pages);  	sbio->page_count++; -	if (sbio->page_count == sctx->pages_per_rd_bio) +	if (sbio->page_count == sctx->pages_per_bio)  		scrub_submit(sctx);  	return 0; @@ -2297,7 +2290,7 @@ leave_nomem:  			scrub_block_put(sblock);  			return -ENOMEM;  		} -		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); +		ASSERT(index < SCRUB_MAX_PAGES_PER_BLOCK);  		scrub_page_get(spage);  		sblock->pagev[index] = spage;  		spage->sblock = sblock; @@ -2369,7 +2362,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)  	struct scrub_ctx *sctx = sbio->sctx;  	int i; -	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); +	ASSERT(sbio->page_count <= SCRUB_PAGES_PER_BIO);  	if (sbio->status) {  		for (i = 0; i < sbio->page_count; i++) {  			struct scrub_page *spage = sbio->pagev[i]; @@ -2631,7 +2624,7 @@ leave_nomem:  			scrub_block_put(sblock);  			return -ENOMEM;  		} -		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); +		ASSERT(index < SCRUB_MAX_PAGES_PER_BLOCK);  		/* For scrub block */  		scrub_page_get(spage);  		sblock->pagev[index] = spage; @@ -2892,15 +2885,15 @@ static void scrub_parity_put(struct scrub_parity *sparity)  static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  						  struct map_lookup *map,  						  struct btrfs_device *sdev, -						  struct btrfs_path *path,  						  u64 logic_start,  						  u64 logic_end)  {  	struct btrfs_fs_info *fs_info = sctx->fs_info; -	struct btrfs_root *root = fs_info->extent_root; -	struct btrfs_root *csum_root = fs_info->csum_root; +	struct btrfs_root *root = btrfs_extent_root(fs_info, logic_start); +	struct btrfs_root *csum_root;  	struct btrfs_extent_item *extent;  	struct btrfs_io_context *bioc = NULL; +	struct btrfs_path *path;  	u64 flags;  	int ret;  	int slot; @@ -2919,6 +2912,16 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  	int extent_mirror_num;  	int stop_loop = 0; +	path = btrfs_alloc_path(); +	if (!path) { +		spin_lock(&sctx->stat_lock); +		sctx->stat.malloc_errors++; +		spin_unlock(&sctx->stat_lock); +		return -ENOMEM; +	} +	path->search_commit_root = 1; +	path->skip_locking = 1; +  	ASSERT(map->stripe_len <= U32_MAX);  	nsectors = map->stripe_len >> fs_info->sectorsize_bits;  	bitmap_len = scrub_calc_parity_bitmap_len(nsectors); @@ -2928,6 +2931,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  		spin_lock(&sctx->stat_lock);  		sctx->stat.malloc_errors++;  		spin_unlock(&sctx->stat_lock); +		btrfs_free_path(path);  		return -ENOMEM;  	} @@ -3060,6 +3064,7 @@ again:  			extent_dev = bioc->stripes[0].dev;  			btrfs_put_bioc(bioc); +			csum_root = btrfs_csum_root(fs_info, extent_logical);  			ret = btrfs_lookup_csums_range(csum_root,  						extent_logical,  						extent_logical + extent_len - 1, @@ -3116,7 +3121,7 @@ out:  	scrub_wr_submit(sctx);  	mutex_unlock(&sctx->wr_lock); -	btrfs_release_path(path); +	btrfs_free_path(path);  	return ret < 0 ? ret : 0;  } @@ -3161,17 +3166,18 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,  }  static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, +					   struct btrfs_block_group *bg,  					   struct map_lookup *map,  					   struct btrfs_device *scrub_dev, -					   int num, u64 base, u64 length, -					   struct btrfs_block_group *cache) +					   int stripe_index, u64 dev_extent_len)  { -	struct btrfs_path *path, *ppath; +	struct btrfs_path *path;  	struct btrfs_fs_info *fs_info = sctx->fs_info; -	struct btrfs_root *root = fs_info->extent_root; -	struct btrfs_root *csum_root = fs_info->csum_root; +	struct btrfs_root *root; +	struct btrfs_root *csum_root;  	struct btrfs_extent_item *extent;  	struct blk_plug plug; +	const u64 chunk_logical = bg->start;  	u64 flags;  	int ret;  	int slot; @@ -3183,10 +3189,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  	u64 physical_end;  	u64 generation;  	int mirror_num; -	struct reada_control *reada1; -	struct reada_control *reada2;  	struct btrfs_key key; -	struct btrfs_key key_end;  	u64 increment = map->stripe_len;  	u64 offset;  	u64 extent_logical; @@ -3202,25 +3205,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  	int extent_mirror_num;  	int stop_loop = 0; -	physical = map->stripes[num].physical; +	physical = map->stripes[stripe_index].physical;  	offset = 0; -	nstripes = div64_u64(length, map->stripe_len); +	nstripes = div64_u64(dev_extent_len, map->stripe_len);  	mirror_num = 1;  	increment = map->stripe_len;  	if (map->type & BTRFS_BLOCK_GROUP_RAID0) { -		offset = map->stripe_len * num; +		offset = map->stripe_len * stripe_index;  		increment = map->stripe_len * map->num_stripes;  	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {  		int factor = map->num_stripes / map->sub_stripes; -		offset = map->stripe_len * (num / map->sub_stripes); +		offset = map->stripe_len * (stripe_index / map->sub_stripes);  		increment = map->stripe_len * factor; -		mirror_num = num % map->sub_stripes + 1; +		mirror_num = stripe_index % map->sub_stripes + 1;  	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { -		mirror_num = num % map->num_stripes + 1; +		mirror_num = stripe_index % map->num_stripes + 1;  	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) { -		mirror_num = num % map->num_stripes + 1; +		mirror_num = stripe_index % map->num_stripes + 1;  	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { -		get_raid56_logic_offset(physical, num, map, &offset, NULL); +		get_raid56_logic_offset(physical, stripe_index, map, &offset, +					NULL);  		increment = map->stripe_len * nr_data_stripes(map);  	} @@ -3228,12 +3232,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  	if (!path)  		return -ENOMEM; -	ppath = btrfs_alloc_path(); -	if (!ppath) { -		btrfs_free_path(path); -		return -ENOMEM; -	} -  	/*  	 * work on commit root. The related disk blocks are static as  	 * long as COW is applied. This means, it is save to rewrite @@ -3241,20 +3239,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  	 */  	path->search_commit_root = 1;  	path->skip_locking = 1; +	path->reada = READA_FORWARD; -	ppath->search_commit_root = 1; -	ppath->skip_locking = 1; -	/* -	 * trigger the readahead for extent tree csum tree and wait for -	 * completion. During readahead, the scrub is officially paused -	 * to not hold off transaction commits -	 */ -	logical = base + offset; +	logical = chunk_logical + offset;  	physical_end = physical + nstripes * map->stripe_len;  	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { -		get_raid56_logic_offset(physical_end, num, +		get_raid56_logic_offset(physical_end, stripe_index,  					map, &logic_end, NULL); -		logic_end += base; +		logic_end += chunk_logical;  	} else {  		logic_end = logical + increment * nstripes;  	} @@ -3262,32 +3254,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  		   atomic_read(&sctx->bios_in_flight) == 0);  	scrub_blocked_if_needed(fs_info); -	/* FIXME it might be better to start readahead at commit root */ -	key.objectid = logical; -	key.type = BTRFS_EXTENT_ITEM_KEY; -	key.offset = (u64)0; -	key_end.objectid = logic_end; -	key_end.type = BTRFS_METADATA_ITEM_KEY; -	key_end.offset = (u64)-1; -	reada1 = btrfs_reada_add(root, &key, &key_end); - -	if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { -		key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; -		key.type = BTRFS_EXTENT_CSUM_KEY; -		key.offset = logical; -		key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; -		key_end.type = BTRFS_EXTENT_CSUM_KEY; -		key_end.offset = logic_end; -		reada2 = btrfs_reada_add(csum_root, &key, &key_end); -	} else { -		reada2 = NULL; -	} - -	if (!IS_ERR(reada1)) -		btrfs_reada_wait(reada1); -	if (!IS_ERR_OR_NULL(reada2)) -		btrfs_reada_wait(reada2); - +	root = btrfs_extent_root(fs_info, logical); +	csum_root = btrfs_csum_root(fs_info, logical);  	/*  	 * collect all data csums for the stripe to avoid seeking during @@ -3333,16 +3301,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  		}  		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { -			ret = get_raid56_logic_offset(physical, num, map, -						      &logical, +			ret = get_raid56_logic_offset(physical, stripe_index, +						      map, &logical,  						      &stripe_logical); -			logical += base; +			logical += chunk_logical;  			if (ret) {  				/* it is parity strip */ -				stripe_logical += base; +				stripe_logical += chunk_logical;  				stripe_end = stripe_logical + increment;  				ret = scrub_raid56_parity(sctx, map, scrub_dev, -							  ppath, stripe_logical, +							  stripe_logical,  							  stripe_end);  				if (ret)  					goto out; @@ -3419,13 +3387,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  			 * Continuing would prevent reusing its device extents  			 * for new block groups for a long time.  			 */ -			spin_lock(&cache->lock); -			if (cache->removed) { -				spin_unlock(&cache->lock); +			spin_lock(&bg->lock); +			if (bg->removed) { +				spin_unlock(&bg->lock);  				ret = 0;  				goto out;  			} -			spin_unlock(&cache->lock); +			spin_unlock(&bg->lock);  			extent = btrfs_item_ptr(l, slot,  						struct btrfs_extent_item); @@ -3504,16 +3472,16 @@ again:  loop:  					physical += map->stripe_len;  					ret = get_raid56_logic_offset(physical, -							num, map, &logical, -							&stripe_logical); -					logical += base; +							stripe_index, map, +							&logical, &stripe_logical); +					logical += chunk_logical;  					if (ret && physical < physical_end) { -						stripe_logical += base; +						stripe_logical += chunk_logical;  						stripe_end = stripe_logical +  								increment;  						ret = scrub_raid56_parity(sctx, -							map, scrub_dev, ppath, +							map, scrub_dev,  							stripe_logical,  							stripe_end);  						if (ret) @@ -3543,8 +3511,8 @@ skip:  		physical += map->stripe_len;  		spin_lock(&sctx->stat_lock);  		if (stop_loop) -			sctx->stat.last_physical = map->stripes[num].physical + -						   length; +			sctx->stat.last_physical = map->stripes[stripe_index].physical + +						   dev_extent_len;  		else  			sctx->stat.last_physical = physical;  		spin_unlock(&sctx->stat_lock); @@ -3560,14 +3528,14 @@ out:  	blk_finish_plug(&plug);  	btrfs_free_path(path); -	btrfs_free_path(ppath);  	if (sctx->is_dev_replace && ret >= 0) {  		int ret2; -		ret2 = sync_write_pointer_for_zoned(sctx, base + offset, -						    map->stripes[num].physical, -						    physical_end); +		ret2 = sync_write_pointer_for_zoned(sctx, +				chunk_logical + offset, +				map->stripes[stripe_index].physical, +				physical_end);  		if (ret2)  			ret = ret2;  	} @@ -3576,10 +3544,10 @@ out:  }  static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, +					  struct btrfs_block_group *bg,  					  struct btrfs_device *scrub_dev, -					  u64 chunk_offset, u64 length,  					  u64 dev_offset, -					  struct btrfs_block_group *cache) +					  u64 dev_extent_len)  {  	struct btrfs_fs_info *fs_info = sctx->fs_info;  	struct extent_map_tree *map_tree = &fs_info->mapping_tree; @@ -3589,7 +3557,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,  	int ret = 0;  	read_lock(&map_tree->lock); -	em = lookup_extent_mapping(map_tree, chunk_offset, 1); +	em = lookup_extent_mapping(map_tree, bg->start, bg->length);  	read_unlock(&map_tree->lock);  	if (!em) { @@ -3597,26 +3565,24 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,  		 * Might have been an unused block group deleted by the cleaner  		 * kthread or relocation.  		 */ -		spin_lock(&cache->lock); -		if (!cache->removed) +		spin_lock(&bg->lock); +		if (!bg->removed)  			ret = -EINVAL; -		spin_unlock(&cache->lock); +		spin_unlock(&bg->lock);  		return ret;  	} - -	map = em->map_lookup; -	if (em->start != chunk_offset) +	if (em->start != bg->start)  		goto out; - -	if (em->len < length) +	if (em->len < dev_extent_len)  		goto out; +	map = em->map_lookup;  	for (i = 0; i < map->num_stripes; ++i) {  		if (map->stripes[i].dev->bdev == scrub_dev->bdev &&  		    map->stripes[i].physical == dev_offset) { -			ret = scrub_stripe(sctx, map, scrub_dev, i, -					   chunk_offset, length, cache); +			ret = scrub_stripe(sctx, bg, map, scrub_dev, i, +					   dev_extent_len);  			if (ret)  				goto out;  		} @@ -3654,7 +3620,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  	struct btrfs_path *path;  	struct btrfs_fs_info *fs_info = sctx->fs_info;  	struct btrfs_root *root = fs_info->dev_root; -	u64 length;  	u64 chunk_offset;  	int ret = 0;  	int ro_set; @@ -3678,6 +3643,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  	key.type = BTRFS_DEV_EXTENT_KEY;  	while (1) { +		u64 dev_extent_len; +  		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  		if (ret < 0)  			break; @@ -3714,9 +3681,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  			break;  		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); -		length = btrfs_dev_extent_length(l, dev_extent); +		dev_extent_len = btrfs_dev_extent_length(l, dev_extent); -		if (found_key.offset + length <= start) +		if (found_key.offset + dev_extent_len <= start)  			goto skip;  		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); @@ -3850,13 +3817,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		scrub_pause_off(fs_info);  		down_write(&dev_replace->rwsem); -		dev_replace->cursor_right = found_key.offset + length; +		dev_replace->cursor_right = found_key.offset + dev_extent_len;  		dev_replace->cursor_left = found_key.offset;  		dev_replace->item_needs_writeback = 1;  		up_write(&dev_replace->rwsem); -		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, -				  found_key.offset, cache); +		ASSERT(cache->start == chunk_offset); +		ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset, +				  dev_extent_len);  		/*  		 * flush, submit all pending read and write bios, afterwards @@ -3937,7 +3905,7 @@ skip_unfreeze:  			break;  		}  skip: -		key.offset = found_key.offset + length; +		key.offset = found_key.offset + dev_extent_len;  		btrfs_release_path(path);  	}  |