diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 308 | 
1 files changed, 278 insertions, 30 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813537f362f9..3efe1c3877bf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,  	caching_ctl->block_group = cache;  	caching_ctl->progress = cache->key.objectid;  	atomic_set(&caching_ctl->count, 1); -	btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); +	btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, +			caching_thread, NULL, NULL);  	spin_lock(&cache->lock);  	/* @@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,  		async->sync = 0;  	init_completion(&async->wait); -	btrfs_init_work(&async->work, delayed_ref_async_start, -			NULL, NULL); +	btrfs_init_work(&async->work, btrfs_extent_refs_helper, +			delayed_ref_async_start, NULL, NULL);  	btrfs_queue_work(root->fs_info->extent_workers, &async->work); @@ -3057,7 +3058,7 @@ out:  static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  			   struct btrfs_root *root,  			   struct extent_buffer *buf, -			   int full_backref, int inc, int no_quota) +			   int full_backref, int inc)  {  	u64 bytenr;  	u64 num_bytes; @@ -3111,7 +3112,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  			key.offset -= btrfs_file_extent_offset(buf, fi);  			ret = process_func(trans, root, bytenr, num_bytes,  					   parent, ref_root, key.objectid, -					   key.offset, no_quota); +					   key.offset, 1);  			if (ret)  				goto fail;  		} else { @@ -3119,7 +3120,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  			num_bytes = btrfs_level_size(root, level - 1);  			ret = process_func(trans, root, bytenr, num_bytes,  					   parent, ref_root, level - 1, 0, -					   no_quota); +					   1);  			if (ret)  				goto fail;  		} @@ -3130,15 +3131,15 @@ fail:  }  int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, -		  struct extent_buffer *buf, int full_backref, int no_quota) +		  struct extent_buffer *buf, int full_backref)  { -	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); +	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);  }  int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, -		  struct extent_buffer *buf, int full_backref, int no_quota) +		  struct extent_buffer *buf, int full_backref)  { -	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); +	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);  }  static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)   */  static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)  { -	/* -	 * we add in the count of missing devices because we want -	 * to make sure that any RAID levels on a degraded FS -	 * continue to be honored. -	 */ -	u64 num_devices = root->fs_info->fs_devices->rw_devices + -		root->fs_info->fs_devices->missing_devices; +	u64 num_devices = root->fs_info->fs_devices->rw_devices;  	u64 target;  	u64 tmp; @@ -7478,6 +7473,220 @@ reada:  	wc->reada_slot = slot;  } +static int account_leaf_items(struct btrfs_trans_handle *trans, +			      struct btrfs_root *root, +			      struct extent_buffer *eb) +{ +	int nr = btrfs_header_nritems(eb); +	int i, extent_type, ret; +	struct btrfs_key key; +	struct btrfs_file_extent_item *fi; +	u64 bytenr, num_bytes; + +	for (i = 0; i < nr; i++) { +		btrfs_item_key_to_cpu(eb, &key, i); + +		if (key.type != BTRFS_EXTENT_DATA_KEY) +			continue; + +		fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); +		/* filter out non qgroup-accountable extents  */ +		extent_type = btrfs_file_extent_type(eb, fi); + +		if (extent_type == BTRFS_FILE_EXTENT_INLINE) +			continue; + +		bytenr = btrfs_file_extent_disk_bytenr(eb, fi); +		if (!bytenr) +			continue; + +		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + +		ret = btrfs_qgroup_record_ref(trans, root->fs_info, +					      root->objectid, +					      bytenr, num_bytes, +					      BTRFS_QGROUP_OPER_SUB_SUBTREE, 0); +		if (ret) +			return ret; +	} +	return 0; +} + +/* + * Walk up the tree from the bottom, freeing leaves and any interior + * nodes which have had all slots visited. If a node (leaf or + * interior) is freed, the node above it will have it's slot + * incremented. The root node will never be freed. + * + * At the end of this function, we should have a path which has all + * slots incremented to the next position for a search. If we need to + * read a new node it will be NULL and the node above it will have the + * correct slot selected for a later read. + * + * If we increment the root nodes slot counter past the number of + * elements, 1 is returned to signal completion of the search. + */ +static int adjust_slots_upwards(struct btrfs_root *root, +				struct btrfs_path *path, int root_level) +{ +	int level = 0; +	int nr, slot; +	struct extent_buffer *eb; + +	if (root_level == 0) +		return 1; + +	while (level <= root_level) { +		eb = path->nodes[level]; +		nr = btrfs_header_nritems(eb); +		path->slots[level]++; +		slot = path->slots[level]; +		if (slot >= nr || level == 0) { +			/* +			 * Don't free the root -  we will detect this +			 * condition after our loop and return a +			 * positive value for caller to stop walking the tree. +			 */ +			if (level != root_level) { +				btrfs_tree_unlock_rw(eb, path->locks[level]); +				path->locks[level] = 0; + +				free_extent_buffer(eb); +				path->nodes[level] = NULL; +				path->slots[level] = 0; +			} +		} else { +			/* +			 * We have a valid slot to walk back down +			 * from. Stop here so caller can process these +			 * new nodes. +			 */ +			break; +		} + +		level++; +	} + +	eb = path->nodes[root_level]; +	if (path->slots[root_level] >= btrfs_header_nritems(eb)) +		return 1; + +	return 0; +} + +/* + * root_eb is the subtree root and is locked before this function is called. + */ +static int account_shared_subtree(struct btrfs_trans_handle *trans, +				  struct btrfs_root *root, +				  struct extent_buffer *root_eb, +				  u64 root_gen, +				  int root_level) +{ +	int ret = 0; +	int level; +	struct extent_buffer *eb = root_eb; +	struct btrfs_path *path = NULL; + +	BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); +	BUG_ON(root_eb == NULL); + +	if (!root->fs_info->quota_enabled) +		return 0; + +	if (!extent_buffer_uptodate(root_eb)) { +		ret = btrfs_read_buffer(root_eb, root_gen); +		if (ret) +			goto out; +	} + +	if (root_level == 0) { +		ret = account_leaf_items(trans, root, root_eb); +		goto out; +	} + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	/* +	 * Walk down the tree.  Missing extent blocks are filled in as +	 * we go. Metadata is accounted every time we read a new +	 * extent block. +	 * +	 * When we reach a leaf, we account for file extent items in it, +	 * walk back up the tree (adjusting slot pointers as we go) +	 * and restart the search process. +	 */ +	extent_buffer_get(root_eb); /* For path */ +	path->nodes[root_level] = root_eb; +	path->slots[root_level] = 0; +	path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ +walk_down: +	level = root_level; +	while (level >= 0) { +		if (path->nodes[level] == NULL) { +			int child_bsize = root->nodesize; +			int parent_slot; +			u64 child_gen; +			u64 child_bytenr; + +			/* We need to get child blockptr/gen from +			 * parent before we can read it. */ +			eb = path->nodes[level + 1]; +			parent_slot = path->slots[level + 1]; +			child_bytenr = btrfs_node_blockptr(eb, parent_slot); +			child_gen = btrfs_node_ptr_generation(eb, parent_slot); + +			eb = read_tree_block(root, child_bytenr, child_bsize, +					     child_gen); +			if (!eb || !extent_buffer_uptodate(eb)) { +				ret = -EIO; +				goto out; +			} + +			path->nodes[level] = eb; +			path->slots[level] = 0; + +			btrfs_tree_read_lock(eb); +			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); +			path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + +			ret = btrfs_qgroup_record_ref(trans, root->fs_info, +						root->objectid, +						child_bytenr, +						child_bsize, +						BTRFS_QGROUP_OPER_SUB_SUBTREE, +						0); +			if (ret) +				goto out; + +		} + +		if (level == 0) { +			ret = account_leaf_items(trans, root, path->nodes[level]); +			if (ret) +				goto out; + +			/* Nonzero return here means we completed our search */ +			ret = adjust_slots_upwards(root, path, root_level); +			if (ret) +				break; + +			/* Restart search with new slots */ +			goto walk_down; +		} + +		level--; +	} + +	ret = 0; +out: +	btrfs_free_path(path); + +	return ret; +} +  /*   * helper to process tree block while walking down the tree.   * @@ -7532,9 +7741,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,  	/* wc->stage == UPDATE_BACKREF */  	if (!(wc->flags[level] & flag)) {  		BUG_ON(!path->locks[level]); -		ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); +		ret = btrfs_inc_ref(trans, root, eb, 1);  		BUG_ON(ret); /* -ENOMEM */ -		ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); +		ret = btrfs_dec_ref(trans, root, eb, 0);  		BUG_ON(ret); /* -ENOMEM */  		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,  						  eb->len, flag, @@ -7581,6 +7790,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,  	int level = wc->level;  	int reada = 0;  	int ret = 0; +	bool need_account = false;  	generation = btrfs_node_ptr_generation(path->nodes[level],  					       path->slots[level]); @@ -7626,6 +7836,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,  	if (wc->stage == DROP_REFERENCE) {  		if (wc->refs[level - 1] > 1) { +			need_account = true;  			if (level == 1 &&  			    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))  				goto skip; @@ -7689,6 +7900,16 @@ skip:  			parent = 0;  		} +		if (need_account) { +			ret = account_shared_subtree(trans, root, next, +						     generation, level - 1); +			if (ret) { +				printk_ratelimited(KERN_ERR "BTRFS: %s Error " +					"%d accounting shared subtree. Quota " +					"is out of sync, rescan required.\n", +					root->fs_info->sb->s_id, ret); +			} +		}  		ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,  				root->root_key.objectid, level - 1, 0, 0);  		BUG_ON(ret); /* -ENOMEM */ @@ -7769,12 +7990,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,  	if (wc->refs[level] == 1) {  		if (level == 0) {  			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) -				ret = btrfs_dec_ref(trans, root, eb, 1, -						    wc->for_reloc); +				ret = btrfs_dec_ref(trans, root, eb, 1);  			else -				ret = btrfs_dec_ref(trans, root, eb, 0, -						    wc->for_reloc); +				ret = btrfs_dec_ref(trans, root, eb, 0);  			BUG_ON(ret); /* -ENOMEM */ +			ret = account_leaf_items(trans, root, eb); +			if (ret) { +				printk_ratelimited(KERN_ERR "BTRFS: %s Error " +					"%d accounting leaf items. Quota " +					"is out of sync, rescan required.\n", +					root->fs_info->sb->s_id, ret); +			}  		}  		/* make block locked assertion in clean_tree_block happy */  		if (!path->locks[level] && @@ -7900,6 +8126,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,  	int level;  	bool root_dropped = false; +	btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid); +  	path = btrfs_alloc_path();  	if (!path) {  		err = -ENOMEM; @@ -8025,6 +8253,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,  				goto out_end_trans;  			} +			/* +			 * Qgroup update accounting is run from +			 * delayed ref handling. This usually works +			 * out because delayed refs are normally the +			 * only way qgroup updates are added. However, +			 * we may have added updates during our tree +			 * walk so run qgroups here to make sure we +			 * don't lose any updates. +			 */ +			ret = btrfs_delayed_qgroup_accounting(trans, +							      root->fs_info); +			if (ret) +				printk_ratelimited(KERN_ERR "BTRFS: Failure %d " +						   "running qgroup updates " +						   "during snapshot delete. " +						   "Quota is out of sync, " +						   "rescan required.\n", ret); +  			btrfs_end_transaction_throttle(trans, tree_root);  			if (!for_reloc && btrfs_need_cleaner_sleep(root)) {  				pr_debug("BTRFS: drop snapshot early exit\n"); @@ -8078,6 +8324,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,  	}  	root_dropped = true;  out_end_trans: +	ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info); +	if (ret) +		printk_ratelimited(KERN_ERR "BTRFS: Failure %d " +				   "running qgroup updates " +				   "during snapshot delete. " +				   "Quota is out of sync, " +				   "rescan required.\n", ret); +  	btrfs_end_transaction_throttle(trans, tree_root);  out_free:  	kfree(wc); @@ -8181,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)  	if (stripped)  		return extended_to_chunk(stripped); -	/* -	 * we add in the count of missing devices because we want -	 * to make sure that any RAID levels on a degraded FS -	 * continue to be honored. -	 */ -	num_devices = root->fs_info->fs_devices->rw_devices + -		root->fs_info->fs_devices->missing_devices; +	num_devices = root->fs_info->fs_devices->rw_devices;  	stripped = BTRFS_BLOCK_GROUP_RAID0 |  		BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |  |