diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
| -rw-r--r-- | fs/btrfs/ordered-data.c | 364 | 
1 files changed, 212 insertions, 152 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a9778a91511e..a629532283bc 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -146,35 +146,11 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,  	return ret;  } -/* - * Add an ordered extent to the per-inode tree. - * - * @inode:           Inode that this extent is for. - * @file_offset:     Logical offset in file where the extent starts. - * @num_bytes:       Logical length of extent in file. - * @ram_bytes:       Full length of unencoded data. - * @disk_bytenr:     Offset of extent on disk. - * @disk_num_bytes:  Size of extent on disk. - * @offset:          Offset into unencoded data where file data starts. - * @flags:           Flags specifying type of extent (1 << BTRFS_ORDERED_*). - * @compress_type:   Compression algorithm used for data. - * - * Most of these parameters correspond to &struct btrfs_file_extent_item. The - * tree is given a single reference on the ordered extent that was inserted, and - * the returned pointer is given a second reference. - * - * Return: the new ordered extent or error pointer. - */ -struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( -			struct btrfs_inode *inode, u64 file_offset, -			u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, -			u64 disk_num_bytes, u64 offset, unsigned long flags, -			int compress_type) +static struct btrfs_ordered_extent *alloc_ordered_extent( +			struct btrfs_inode *inode, u64 file_offset, u64 num_bytes, +			u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, +			u64 offset, unsigned long flags, int compress_type)  { -	struct btrfs_root *root = inode->root; -	struct btrfs_fs_info *fs_info = root->fs_info; -	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; -	struct rb_node *node;  	struct btrfs_ordered_extent *entry;  	int ret; @@ -184,7 +160,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  		ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);  		if (ret < 0)  			return ERR_PTR(ret); -		ret = 0;  	} else {  		/*  		 * The ordered extent has reserved qgroup space, release now @@ -209,15 +184,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  	entry->compress_type = compress_type;  	entry->truncated_len = (u64)-1;  	entry->qgroup_rsv = ret; -	entry->physical = (u64)-1; - -	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);  	entry->flags = flags; - -	percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes, -				 fs_info->delalloc_batch); - -	/* one ref for the tree */  	refcount_set(&entry->refs, 1);  	init_waitqueue_head(&entry->wait);  	INIT_LIST_HEAD(&entry->list); @@ -226,15 +193,40 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  	INIT_LIST_HEAD(&entry->work_list);  	init_completion(&entry->completion); +	/* +	 * We don't need the count_max_extents here, we can assume that all of +	 * that work has been done at higher layers, so this is truly the +	 * smallest the extent is going to get. +	 */ +	spin_lock(&inode->lock); +	btrfs_mod_outstanding_extents(inode, 1); +	spin_unlock(&inode->lock); + +	return entry; +} + +static void insert_ordered_extent(struct btrfs_ordered_extent *entry) +{ +	struct btrfs_inode *inode = BTRFS_I(entry->inode); +	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; +	struct btrfs_root *root = inode->root; +	struct btrfs_fs_info *fs_info = root->fs_info; +	struct rb_node *node; +  	trace_btrfs_ordered_extent_add(inode, entry); +	percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes, +				 fs_info->delalloc_batch); + +	/* One ref for the tree. */ +	refcount_inc(&entry->refs); +  	spin_lock_irq(&tree->lock); -	node = tree_insert(&tree->tree, file_offset, -			   &entry->rb_node); +	node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);  	if (node)  		btrfs_panic(fs_info, -EEXIST,  				"inconsistency in ordered tree at offset %llu", -				file_offset); +				entry->file_offset);  	spin_unlock_irq(&tree->lock);  	spin_lock(&root->ordered_extent_lock); @@ -248,43 +240,43 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(  		spin_unlock(&fs_info->ordered_root_lock);  	}  	spin_unlock(&root->ordered_extent_lock); - -	/* -	 * We don't need the count_max_extents here, we can assume that all of -	 * that work has been done at higher layers, so this is truly the -	 * smallest the extent is going to get. -	 */ -	spin_lock(&inode->lock); -	btrfs_mod_outstanding_extents(inode, 1); -	spin_unlock(&inode->lock); - -	/* One ref for the returned entry to match semantics of lookup. */ -	refcount_inc(&entry->refs); - -	return entry;  }  /* - * Add a new btrfs_ordered_extent for the range, but drop the reference instead - * of returning it to the caller. + * Add an ordered extent to the per-inode tree. + * + * @inode:           Inode that this extent is for. + * @file_offset:     Logical offset in file where the extent starts. + * @num_bytes:       Logical length of extent in file. + * @ram_bytes:       Full length of unencoded data. + * @disk_bytenr:     Offset of extent on disk. + * @disk_num_bytes:  Size of extent on disk. + * @offset:          Offset into unencoded data where file data starts. + * @flags:           Flags specifying type of extent (1 << BTRFS_ORDERED_*). + * @compress_type:   Compression algorithm used for data. + * + * Most of these parameters correspond to &struct btrfs_file_extent_item. The + * tree is given a single reference on the ordered extent that was inserted, and + * the returned pointer is given a second reference. + * + * Return: the new ordered extent or error pointer.   */ -int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, -			     u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, -			     u64 disk_num_bytes, u64 offset, unsigned long flags, -			     int compress_type) +struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( +			struct btrfs_inode *inode, u64 file_offset, +			u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, +			u64 disk_num_bytes, u64 offset, unsigned long flags, +			int compress_type)  { -	struct btrfs_ordered_extent *ordered; - -	ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes, -					     ram_bytes, disk_bytenr, -					     disk_num_bytes, offset, flags, -					     compress_type); +	struct btrfs_ordered_extent *entry; -	if (IS_ERR(ordered)) -		return PTR_ERR(ordered); -	btrfs_put_ordered_extent(ordered); +	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); -	return 0; +	entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes, +				     disk_bytenr, disk_num_bytes, offset, flags, +				     compress_type); +	if (!IS_ERR(entry)) +		insert_ordered_extent(entry); +	return entry;  }  /* @@ -311,6 +303,90 @@ static void finish_ordered_fn(struct btrfs_work *work)  	btrfs_finish_ordered_io(ordered_extent);  } +static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, +				      struct page *page, u64 file_offset, +				      u64 len, bool uptodate) +{ +	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_fs_info *fs_info = inode->root->fs_info; + +	lockdep_assert_held(&inode->ordered_tree.lock); + +	if (page) { +		ASSERT(page->mapping); +		ASSERT(page_offset(page) <= file_offset); +		ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE); + +		/* +		 * Ordered (Private2) bit indicates whether we still have +		 * pending io unfinished for the ordered extent. +		 * +		 * If there's no such bit, we need to skip to next range. +		 */ +		if (!btrfs_page_test_ordered(fs_info, page, file_offset, len)) +			return false; +		btrfs_page_clear_ordered(fs_info, page, file_offset, len); +	} + +	/* Now we're fine to update the accounting. */ +	if (WARN_ON_ONCE(len > ordered->bytes_left)) { +		btrfs_crit(fs_info, +"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu", +			   inode->root->root_key.objectid, btrfs_ino(inode), +			   ordered->file_offset, ordered->num_bytes, +			   len, ordered->bytes_left); +		ordered->bytes_left = 0; +	} else { +		ordered->bytes_left -= len; +	} + +	if (!uptodate) +		set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + +	if (ordered->bytes_left) +		return false; + +	/* +	 * All the IO of the ordered extent is finished, we need to queue +	 * the finish_func to be executed. +	 */ +	set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags); +	cond_wake_up(&ordered->wait); +	refcount_inc(&ordered->refs); +	trace_btrfs_ordered_extent_mark_finished(inode, ordered); +	return true; +} + +static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) +{ +	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_fs_info *fs_info = inode->root->fs_info; +	struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ? +		fs_info->endio_freespace_worker : fs_info->endio_write_workers; + +	btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); +	btrfs_queue_work(wq, &ordered->work); +} + +bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, +				 struct page *page, u64 file_offset, u64 len, +				 bool uptodate) +{ +	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	unsigned long flags; +	bool ret; + +	trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); + +	spin_lock_irqsave(&inode->ordered_tree.lock, flags); +	ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate); +	spin_unlock_irqrestore(&inode->ordered_tree.lock, flags); + +	if (ret) +		btrfs_queue_ordered_fn(ordered); +	return ret; +} +  /*   * Mark all ordered extents io inside the specified range finished.   * @@ -329,22 +405,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,  				    u64 num_bytes, bool uptodate)  {  	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; -	struct btrfs_fs_info *fs_info = inode->root->fs_info; -	struct btrfs_workqueue *wq;  	struct rb_node *node;  	struct btrfs_ordered_extent *entry = NULL;  	unsigned long flags;  	u64 cur = file_offset; -	if (btrfs_is_free_space_inode(inode)) -		wq = fs_info->endio_freespace_worker; -	else -		wq = fs_info->endio_write_workers; - -	if (page) -		ASSERT(page->mapping && page_offset(page) <= file_offset && -		       file_offset + num_bytes <= page_offset(page) + PAGE_SIZE); -  	spin_lock_irqsave(&tree->lock, flags);  	while (cur < file_offset + num_bytes) {  		u64 entry_end; @@ -397,50 +462,9 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,  		ASSERT(end + 1 - cur < U32_MAX);  		len = end + 1 - cur; -		if (page) { -			/* -			 * Ordered (Private2) bit indicates whether we still -			 * have pending io unfinished for the ordered extent. -			 * -			 * If there's no such bit, we need to skip to next range. -			 */ -			if (!btrfs_page_test_ordered(fs_info, page, cur, len)) { -				cur += len; -				continue; -			} -			btrfs_page_clear_ordered(fs_info, page, cur, len); -		} - -		/* Now we're fine to update the accounting */ -		if (unlikely(len > entry->bytes_left)) { -			WARN_ON(1); -			btrfs_crit(fs_info, -"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu", -				   inode->root->root_key.objectid, -				   btrfs_ino(inode), -				   entry->file_offset, -				   entry->num_bytes, -				   len, entry->bytes_left); -			entry->bytes_left = 0; -		} else { -			entry->bytes_left -= len; -		} - -		if (!uptodate) -			set_bit(BTRFS_ORDERED_IOERR, &entry->flags); - -		/* -		 * All the IO of the ordered extent is finished, we need to queue -		 * the finish_func to be executed. -		 */ -		if (entry->bytes_left == 0) { -			set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); -			cond_wake_up(&entry->wait); -			refcount_inc(&entry->refs); -			trace_btrfs_ordered_extent_mark_finished(inode, entry); +		if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {  			spin_unlock_irqrestore(&tree->lock, flags); -			btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL); -			btrfs_queue_work(wq, &entry->work); +			btrfs_queue_ordered_fn(entry);  			spin_lock_irqsave(&tree->lock, flags);  		}  		cur += len; @@ -564,7 +588,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,  	freespace_inode = btrfs_is_free_space_inode(btrfs_inode);  	btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); -	/* This is paired with btrfs_add_ordered_extent. */ +	/* This is paired with btrfs_alloc_ordered_extent. */  	spin_lock(&btrfs_inode->lock);  	btrfs_mod_outstanding_extents(btrfs_inode, -1);  	spin_unlock(&btrfs_inode->lock); @@ -1117,17 +1141,22 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,  }  /* Split out a new ordered extent for this first @len bytes of @ordered. */ -int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) +struct btrfs_ordered_extent *btrfs_split_ordered_extent( +			struct btrfs_ordered_extent *ordered, u64 len)  { -	struct inode *inode = ordered->inode; -	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +	struct btrfs_inode *inode = BTRFS_I(ordered->inode); +	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; +	struct btrfs_root *root = inode->root; +	struct btrfs_fs_info *fs_info = root->fs_info;  	u64 file_offset = ordered->file_offset;  	u64 disk_bytenr = ordered->disk_bytenr; -	unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS; +	unsigned long flags = ordered->flags; +	struct btrfs_ordered_sum *sum, *tmpsum; +	struct btrfs_ordered_extent *new;  	struct rb_node *node; +	u64 offset = 0; -	trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered); +	trace_btrfs_ordered_extent_split(inode, ordered);  	ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED))); @@ -1136,18 +1165,27 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)  	 * reduce the original extent to a zero length either.  	 */  	if (WARN_ON_ONCE(len >= ordered->num_bytes)) -		return -EINVAL; -	/* We cannot split once ordered extent is past end_bio. */ -	if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) -		return -EINVAL; +		return ERR_PTR(-EINVAL); +	/* We cannot split partially completed ordered extents. */ +	if (ordered->bytes_left) { +		ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); +		if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) +			return ERR_PTR(-EINVAL); +	}  	/* We cannot split a compressed ordered extent. */  	if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) -		return -EINVAL; -	/* Checksum list should be empty. */ -	if (WARN_ON_ONCE(!list_empty(&ordered->list))) -		return -EINVAL; +		return ERR_PTR(-EINVAL); -	spin_lock_irq(&tree->lock); +	new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr, +				   len, 0, flags, ordered->compress_type); +	if (IS_ERR(new)) +		return new; + +	/* One ref for the tree. */ +	refcount_inc(&new->refs); + +	spin_lock_irq(&root->ordered_extent_lock); +	spin_lock(&tree->lock);  	/* Remove from tree once */  	node = &ordered->rb_node;  	rb_erase(node, &tree->tree); @@ -1159,26 +1197,48 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)  	ordered->disk_bytenr += len;  	ordered->num_bytes -= len;  	ordered->disk_num_bytes -= len; -	ordered->bytes_left -= len; + +	if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { +		ASSERT(ordered->bytes_left == 0); +		new->bytes_left = 0; +	} else { +		ordered->bytes_left -= len; +	} + +	if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) { +		if (ordered->truncated_len > len) { +			ordered->truncated_len -= len; +		} else { +			new->truncated_len = ordered->truncated_len; +			ordered->truncated_len = 0; +		} +	} + +	list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) { +		if (offset == len) +			break; +		list_move_tail(&sum->list, &new->list); +		offset += sum->len; +	}  	/* Re-insert the node */  	node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);  	if (node)  		btrfs_panic(fs_info, -EEXIST,  			"zoned: inconsistency in ordered tree at offset %llu", -			    ordered->file_offset); +			ordered->file_offset); -	spin_unlock_irq(&tree->lock); - -	/* -	 * The splitting extent is already counted and will be added again in -	 * btrfs_add_ordered_extent(). Subtract len to avoid double counting. -	 */ -	percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch); +	node = tree_insert(&tree->tree, new->file_offset, &new->rb_node); +	if (node) +		btrfs_panic(fs_info, -EEXIST, +			"zoned: inconsistency in ordered tree at offset %llu", +			new->file_offset); +	spin_unlock(&tree->lock); -	return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len, -					disk_bytenr, len, 0, flags, -					ordered->compress_type); +	list_add_tail(&new->root_extent_list, &root->ordered_extents); +	root->nr_ordered_extents++; +	spin_unlock_irq(&root->ordered_extent_lock); +	return new;  }  int __init ordered_data_init(void)  |