diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
| -rw-r--r-- | fs/btrfs/ordered-data.c | 31 | 
1 files changed, 31 insertions, 0 deletions
| diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c5bdd674f55c..35a413ce935d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -388,6 +388,37 @@ bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,  	ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);  	spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); +	/* +	 * If this is a COW write it means we created new extent maps for the +	 * range and they point to unwritten locations if we got an error either +	 * before submitting a bio or during IO. +	 * +	 * We have marked the ordered extent with BTRFS_ORDERED_IOERR, and we +	 * are queuing its completion below. During completion, at +	 * btrfs_finish_one_ordered(), we will drop the extent maps for the +	 * unwritten extents. +	 * +	 * However because completion runs in a work queue we can end up having +	 * a fast fsync running before that. In the case of direct IO, once we +	 * unlock the inode the fsync might start, and we queue the completion +	 * before unlocking the inode. In the case of buffered IO when writeback +	 * finishes (end_bbio_data_write()) we queue the completion, so if the +	 * writeback was triggered by a fast fsync, the fsync might start +	 * logging before ordered extent completion runs in the work queue. +	 * +	 * The fast fsync will log file extent items based on the extent maps it +	 * finds, so if by the time it collects extent maps the ordered extent +	 * completion didn't happen yet, it will log file extent items that +	 * point to unwritten extents, resulting in a corruption if a crash +	 * happens and the log tree is replayed. Note that a fast fsync does not +	 * wait for completion of ordered extents in order to reduce latency. +	 * +	 * Set a flag in the inode so that the next fast fsync will wait for +	 * ordered extents to complete before starting to log. +	 */ +	if (!uptodate && !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) +		set_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags); +  	if (ret)  		btrfs_queue_ordered_fn(ordered);  	return ret; |