diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
| -rw-r--r-- | fs/btrfs/ordered-data.c | 146 | 
1 files changed, 95 insertions, 51 deletions
| diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 35a413ce935d..82a68394a89c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -19,6 +19,7 @@  #include "qgroup.h"  #include "subpage.h"  #include "file.h" +#include "block-group.h"  static struct kmem_cache *btrfs_ordered_extent_cache; @@ -179,7 +180,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(  	entry->disk_num_bytes = disk_num_bytes;  	entry->offset = offset;  	entry->bytes_left = num_bytes; -	entry->inode = igrab(&inode->vfs_inode); +	entry->inode = BTRFS_I(igrab(&inode->vfs_inode));  	entry->compress_type = compress_type;  	entry->truncated_len = (u64)-1;  	entry->qgroup_rsv = qgroup_rsv; @@ -207,7 +208,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(  static void insert_ordered_extent(struct btrfs_ordered_extent *entry)  { -	struct btrfs_inode *inode = BTRFS_I(entry->inode); +	struct btrfs_inode *inode = entry->inode;  	struct btrfs_root *root = inode->root;  	struct btrfs_fs_info *fs_info = root->fs_info;  	struct rb_node *node; @@ -223,7 +224,7 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)  	spin_lock_irq(&inode->ordered_tree_lock);  	node = tree_insert(&inode->ordered_tree, entry->file_offset,  			   &entry->rb_node); -	if (node) +	if (unlikely(node))  		btrfs_panic(fs_info, -EEXIST,  				"inconsistency in ordered tree at offset %llu",  				entry->file_offset); @@ -263,17 +264,39 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)   */  struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  			struct btrfs_inode *inode, u64 file_offset, -			u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, -			u64 disk_num_bytes, u64 offset, unsigned long flags, -			int compress_type) +			const struct btrfs_file_extent *file_extent, unsigned long flags)  {  	struct btrfs_ordered_extent *entry;  	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); -	entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes, -				     disk_bytenr, disk_num_bytes, offset, flags, -				     compress_type); +	/* +	 * For regular writes, we just use the members in @file_extent. +	 * +	 * For NOCOW, we don't really care about the numbers except @start and +	 * file_extent->num_bytes, as we won't insert a file extent item at all. +	 * +	 * For PREALLOC, we do not use ordered extent members, but +	 * btrfs_mark_extent_written() handles everything. +	 * +	 * So here we always pass 0 as offset for NOCOW/PREALLOC ordered extents, +	 * or btrfs_split_ordered_extent() cannot handle it correctly. +	 */ +	if (flags & ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC))) +		entry = alloc_ordered_extent(inode, file_offset, +					     file_extent->num_bytes, +					     file_extent->num_bytes, +					     file_extent->disk_bytenr + file_extent->offset, +					     file_extent->num_bytes, 0, flags, +					     file_extent->compression); +	else +		entry = alloc_ordered_extent(inode, file_offset, +					     file_extent->num_bytes, +					     file_extent->ram_bytes, +					     file_extent->disk_bytenr, +					     file_extent->disk_num_bytes, +					     file_extent->offset, flags, +					     file_extent->compression);  	if (!IS_ERR(entry))  		insert_ordered_extent(entry);  	return entry; @@ -287,7 +310,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,  			   struct btrfs_ordered_sum *sum)  { -	struct btrfs_inode *inode = BTRFS_I(entry->inode); +	struct btrfs_inode *inode = entry->inode;  	spin_lock_irq(&inode->ordered_tree_lock);  	list_add_tail(&sum->list, &entry->list); @@ -297,7 +320,7 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,  void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered)  {  	if (!test_and_set_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) -		mapping_set_error(ordered->inode->i_mapping, -EIO); +		mapping_set_error(ordered->inode->vfs_inode.i_mapping, -EIO);  }  static void finish_ordered_fn(struct btrfs_work *work) @@ -312,7 +335,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,  				      struct page *page, u64 file_offset,  				      u64 len, bool uptodate)  { -	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_inode *inode = ordered->inode;  	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	lockdep_assert_held(&inode->ordered_tree_lock); @@ -365,7 +388,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,  static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)  { -	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_inode *inode = ordered->inode;  	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?  		fs_info->endio_freespace_worker : fs_info->endio_write_workers; @@ -374,11 +397,11 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)  	btrfs_queue_work(wq, &ordered->work);  } -bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, +void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,  				 struct page *page, u64 file_offset, u64 len,  				 bool uptodate)  { -	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_inode *inode = ordered->inode;  	unsigned long flags;  	bool ret; @@ -421,7 +444,6 @@ bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,  	if (ret)  		btrfs_queue_ordered_fn(ordered); -	return ret;  }  /* @@ -588,14 +610,14 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)  	struct list_head *cur;  	struct btrfs_ordered_sum *sum; -	trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry); +	trace_btrfs_ordered_extent_put(entry->inode, entry);  	if (refcount_dec_and_test(&entry->refs)) {  		ASSERT(list_empty(&entry->root_extent_list));  		ASSERT(list_empty(&entry->log_list));  		ASSERT(RB_EMPTY_NODE(&entry->rb_node));  		if (entry->inode) -			btrfs_add_delayed_iput(BTRFS_I(entry->inode)); +			btrfs_add_delayed_iput(entry->inode);  		while (!list_empty(&entry->list)) {  			cur = entry->list.next;  			sum = list_entry(cur, struct btrfs_ordered_sum, list); @@ -626,7 +648,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,  	freespace_inode = btrfs_is_free_space_inode(btrfs_inode);  	btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); -	/* This is paired with btrfs_alloc_ordered_extent. */ +	/* This is paired with alloc_ordered_extent(). */  	spin_lock(&btrfs_inode->lock);  	btrfs_mod_outstanding_extents(btrfs_inode, -1);  	spin_unlock(&btrfs_inode->lock); @@ -712,11 +734,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)  }  /* - * wait for all the ordered extents in a root.  This is done when balancing - * space between drives. + * Wait for all the ordered extents in a root. Use @bg as range or do whole + * range if it's NULL.   */  u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, -			       const u64 range_start, const u64 range_len) +			       const struct btrfs_block_group *bg)  {  	struct btrfs_fs_info *fs_info = root->fs_info;  	LIST_HEAD(splice); @@ -724,7 +746,17 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,  	LIST_HEAD(works);  	struct btrfs_ordered_extent *ordered, *next;  	u64 count = 0; -	const u64 range_end = range_start + range_len; +	u64 range_start, range_len; +	u64 range_end; + +	if (bg) { +		range_start = bg->start; +		range_len = bg->length; +	} else { +		range_start = 0; +		range_len = U64_MAX; +	} +	range_end = range_start + range_len;  	mutex_lock(&root->ordered_extent_mutex);  	spin_lock(&root->ordered_extent_lock); @@ -751,10 +783,10 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,  		btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);  		cond_resched(); -		spin_lock(&root->ordered_extent_lock);  		if (nr != U64_MAX)  			nr--;  		count++; +		spin_lock(&root->ordered_extent_lock);  	}  	list_splice_tail(&skipped, &root->ordered_extents);  	list_splice_tail(&splice, &root->ordered_extents); @@ -771,8 +803,12 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,  	return count;  } +/* + * Wait for @nr ordered extents that intersect the @bg, or the whole range of + * the filesystem if @bg is NULL. + */  void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, -			     const u64 range_start, const u64 range_len) +			      const struct btrfs_block_group *bg)  {  	struct btrfs_root *root;  	LIST_HEAD(splice); @@ -790,14 +826,13 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,  			       &fs_info->ordered_roots);  		spin_unlock(&fs_info->ordered_root_lock); -		done = btrfs_wait_ordered_extents(root, nr, -						  range_start, range_len); +		done = btrfs_wait_ordered_extents(root, nr, bg);  		btrfs_put_root(root); -		spin_lock(&fs_info->ordered_root_lock); -		if (nr != U64_MAX) { +		if (nr != U64_MAX)  			nr -= done; -		} + +		spin_lock(&fs_info->ordered_root_lock);  	}  	list_splice_tail(&splice, &fs_info->ordered_roots);  	spin_unlock(&fs_info->ordered_root_lock); @@ -814,7 +849,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)  {  	u64 start = entry->file_offset;  	u64 end = start + entry->num_bytes - 1; -	struct btrfs_inode *inode = BTRFS_I(entry->inode); +	struct btrfs_inode *inode = entry->inode;  	bool freespace_inode;  	trace_btrfs_ordered_extent_start(inode, entry); @@ -841,7 +876,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)  /*   * Used to wait on ordered extents across a large range of bytes.   */ -int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len)  {  	int ret = 0;  	int ret_wb = 0; @@ -871,11 +906,11 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)  	 * before the ordered extents complete - to avoid failures (-EEXIST)  	 * when adding the new ordered extents to the ordered tree.  	 */ -	ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); +	ret_wb = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, orig_end);  	end = orig_end;  	while (1) { -		ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end); +		ordered = btrfs_lookup_first_ordered_extent(inode, end);  		if (!ordered)  			break;  		if (ordered->file_offset > orig_end) { @@ -1173,7 +1208,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,  struct btrfs_ordered_extent *btrfs_split_ordered_extent(  			struct btrfs_ordered_extent *ordered, u64 len)  { -	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_inode *inode = ordered->inode;  	struct btrfs_root *root = inode->root;  	struct btrfs_fs_info *fs_info = root->fs_info;  	u64 file_offset = ordered->file_offset; @@ -1212,15 +1247,32 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(  	/* One ref for the tree. */  	refcount_inc(&new->refs); +	/* +	 * Take the root's ordered_extent_lock to avoid a race with +	 * btrfs_wait_ordered_extents() when updating the disk_bytenr and +	 * disk_num_bytes fields of the ordered extent below. And we disable +	 * IRQs because the inode's ordered_tree_lock is used in IRQ context +	 * elsewhere. +	 * +	 * There's no concern about a previous caller of +	 * btrfs_wait_ordered_extents() getting the trimmed ordered extent +	 * before we insert the new one, because even if it gets the ordered +	 * extent before it's trimmed and the new one inserted, right before it +	 * uses it or during its use, the ordered extent might have been +	 * trimmed in the meanwhile, and it missed the new ordered extent. +	 * There's no way around this and it's harmless for current use cases, +	 * so we take the root's ordered_extent_lock to fix that race during +	 * trimming and silence tools like KCSAN. +	 */  	spin_lock_irq(&root->ordered_extent_lock);  	spin_lock(&inode->ordered_tree_lock); -	/* Remove from tree once */ -	node = &ordered->rb_node; -	rb_erase(node, &inode->ordered_tree); -	RB_CLEAR_NODE(node); -	if (inode->ordered_tree_last == node) -		inode->ordered_tree_last = NULL; +	/* +	 * We don't have overlapping ordered extents (that would imply double +	 * allocation of extents) and we checked above that the split length +	 * does not cross the ordered extent's num_bytes field, so there's +	 * no need to remove it and re-insert it in the tree. +	 */  	ordered->file_offset += len;  	ordered->disk_bytenr += len;  	ordered->num_bytes -= len; @@ -1250,18 +1302,10 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(  		offset += sum->len;  	} -	/* Re-insert the node */ -	node = tree_insert(&inode->ordered_tree, ordered->file_offset, -			   &ordered->rb_node); -	if (node) -		btrfs_panic(fs_info, -EEXIST, -			"zoned: inconsistency in ordered tree at offset %llu", -			ordered->file_offset); -  	node = tree_insert(&inode->ordered_tree, new->file_offset, &new->rb_node); -	if (node) +	if (unlikely(node))  		btrfs_panic(fs_info, -EEXIST, -			"zoned: inconsistency in ordered tree at offset %llu", +			"inconsistency in ordered tree at offset %llu after split",  			new->file_offset);  	spin_unlock(&inode->ordered_tree_lock); |