diff options
Diffstat (limited to 'fs/btrfs/ioctl.c')
| -rw-r--r-- | fs/btrfs/ioctl.c | 97 | 
1 files changed, 79 insertions, 18 deletions
| diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a5bd6926f7ff..33eda39df685 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  		goto fail;  	} -	spin_lock(&fs_info->trans_lock); -	list_add(&pending_snapshot->list, -		 &trans->transaction->pending_snapshots); -	spin_unlock(&fs_info->trans_lock); +	trans->pending_snapshot = pending_snapshot;  	ret = btrfs_commit_transaction(trans);  	if (ret) @@ -1214,6 +1211,35 @@ static int defrag_collect_targets(struct btrfs_inode *inode,  			goto next;  		/* +		 * Our start offset might be in the middle of an existing extent +		 * map, so take that into account. +		 */ +		range_len = em->len - (cur - em->start); +		/* +		 * If this range of the extent map is already flagged for delalloc, +		 * skip it, because: +		 * +		 * 1) We could deadlock later, when trying to reserve space for +		 *    delalloc, because in case we can't immediately reserve space +		 *    the flusher can start delalloc and wait for the respective +		 *    ordered extents to complete. The deadlock would happen +		 *    because we do the space reservation while holding the range +		 *    locked, and starting writeback, or finishing an ordered +		 *    extent, requires locking the range; +		 * +		 * 2) If there's delalloc there, it means there's dirty pages for +		 *    which writeback has not started yet (we clean the delalloc +		 *    flag when starting writeback and after creating an ordered +		 *    extent). If we mark pages in an adjacent range for defrag, +		 *    then we will have a larger contiguous range for delalloc, +		 *    very likely resulting in a larger extent after writeback is +		 *    triggered (except in a case of free space fragmentation). +		 */ +		if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1, +				   EXTENT_DELALLOC, 0, NULL)) +			goto next; + +		/*  		 * For do_compress case, we want to compress all valid file  		 * extents, thus no @extent_thresh or mergeable check.  		 */ @@ -1221,7 +1247,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,  			goto add;  		/* Skip too large extent */ -		if (em->len >= extent_thresh) +		if (range_len >= extent_thresh)  			goto next;  		next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, @@ -1442,9 +1468,11 @@ static int defrag_one_cluster(struct btrfs_inode *inode,  	list_for_each_entry(entry, &target_list, list) {  		u32 range_len = entry->len; -		/* Reached the limit */ -		if (max_sectors && max_sectors == *sectors_defragged) +		/* Reached or beyond the limit */ +		if (max_sectors && *sectors_defragged >= max_sectors) { +			ret = 1;  			break; +		}  		if (max_sectors)  			range_len = min_t(u32, range_len, @@ -1465,7 +1493,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode,  				       extent_thresh, newer_than, do_compress);  		if (ret < 0)  			break; -		*sectors_defragged += range_len; +		*sectors_defragged += range_len >> +				      inode->root->fs_info->sectorsize_bits;  	}  out:  	list_for_each_entry_safe(entry, tmp, &target_list, list) { @@ -1484,6 +1513,12 @@ out:   * @newer_than:	   minimum transid to defrag   * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode   *		   will be defragged. + * + * Return <0 for error. + * Return >=0 for the number of sectors defragged, and range->start will be updated + * to indicate the file offset where next defrag should be started at. + * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without + *  defragging all the range).   */  int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,  		      struct btrfs_ioctl_defrag_range_args *range, @@ -1499,6 +1534,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,  	int compress_type = BTRFS_COMPRESS_ZLIB;  	int ret = 0;  	u32 extent_thresh = range->extent_thresh; +	pgoff_t start_index;  	if (isize == 0)  		return 0; @@ -1518,12 +1554,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,  	if (range->start + range->len > range->start) {  		/* Got a specific range */ -		last_byte = min(isize, range->start + range->len) - 1; +		last_byte = min(isize, range->start + range->len);  	} else {  		/* Defrag until file end */ -		last_byte = isize - 1; +		last_byte = isize;  	} +	/* Align the range */ +	cur = round_down(range->start, fs_info->sectorsize); +	last_byte = round_up(last_byte, fs_info->sectorsize) - 1; +  	/*  	 * If we were not given a ra, allocate a readahead context. As  	 * readahead is just an optimization, defrag will work without it so @@ -1536,16 +1576,26 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,  			file_ra_state_init(ra, inode->i_mapping);  	} -	/* Align the range */ -	cur = round_down(range->start, fs_info->sectorsize); -	last_byte = round_up(last_byte, fs_info->sectorsize) - 1; +	/* +	 * Make writeback start from the beginning of the range, so that the +	 * defrag range can be written sequentially. +	 */ +	start_index = cur >> PAGE_SHIFT; +	if (start_index < inode->i_mapping->writeback_index) +		inode->i_mapping->writeback_index = start_index;  	while (cur < last_byte) { +		const unsigned long prev_sectors_defragged = sectors_defragged;  		u64 cluster_end;  		/* The cluster size 256K should always be page aligned */  		BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); +		if (btrfs_defrag_cancelled(fs_info)) { +			ret = -EAGAIN; +			break; +		} +  		/* We want the cluster end at page boundary when possible */  		cluster_end = (((cur >> PAGE_SHIFT) +  			       (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; @@ -1567,14 +1617,27 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,  				cluster_end + 1 - cur, extent_thresh,  				newer_than, do_compress,  				§ors_defragged, max_to_defrag); + +		if (sectors_defragged > prev_sectors_defragged) +			balance_dirty_pages_ratelimited(inode->i_mapping); +  		btrfs_inode_unlock(inode, 0);  		if (ret < 0)  			break;  		cur = cluster_end + 1; +		if (ret > 0) { +			ret = 0; +			break; +		}  	}  	if (ra_allocated)  		kfree(ra); +	/* +	 * Update range.start for autodefrag, this will indicate where to start +	 * in next run. +	 */ +	range->start = cur;  	if (sectors_defragged) {  		/*  		 * We have defragged some sectors, for compression case they @@ -3086,10 +3149,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	btrfs_inode_lock(inode, 0);  	err = btrfs_delete_subvolume(dir, dentry);  	btrfs_inode_unlock(inode, 0); -	if (!err) { -		fsnotify_rmdir(dir, dentry); -		d_delete(dentry); -	} +	if (!err) +		d_delete_notify(dir, dentry);  out_dput:  	dput(dentry); @@ -3290,7 +3351,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)  	struct block_device *bdev = NULL;  	fmode_t mode;  	int ret; -	bool cancel; +	bool cancel = false;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; |