diff options
| -rw-r--r-- | fs/iomap.c | 17 | ||||
| -rw-r--r-- | fs/xfs/kmem.c | 18 | ||||
| -rw-r--r-- | fs/xfs/kmem.h | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 34 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_aops.c | 59 | ||||
| -rw-r--r-- | fs/xfs/xfs_icache.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 25 | ||||
| -rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_reflink.c | 23 | ||||
| -rw-r--r-- | fs/xfs/xfs_reflink.h | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_super.c | 2 | 
14 files changed, 103 insertions, 100 deletions
| diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,  	struct address_space *mapping = iocb->ki_filp->f_mapping;  	struct inode *inode = file_inode(iocb->ki_filp);  	size_t count = iov_iter_count(iter); -	loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; +	loff_t pos = iocb->ki_pos, start = pos; +	loff_t end = iocb->ki_pos + count - 1, ret = 0;  	unsigned int flags = IOMAP_DIRECT;  	struct blk_plug plug;  	struct iomap_dio *dio; @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,  	}  	if (mapping->nrpages) { -		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); +		ret = filemap_write_and_wait_range(mapping, start, end);  		if (ret)  			goto out_free_dio;  		ret = invalidate_inode_pages2_range(mapping, -				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); +				start >> PAGE_SHIFT, end >> PAGE_SHIFT);  		WARN_ON_ONCE(ret);  		ret = 0;  	} @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,  		__set_current_state(TASK_RUNNING);  	} +	ret = iomap_dio_complete(dio); +  	/*  	 * Try again to invalidate clean pages which might have been cached by  	 * non-direct readahead, or faulted in by get_user_pages() if the source @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,  	 * this invalidation fails, tough, the write still worked...  	 */  	if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { -		ret = invalidate_inode_pages2_range(mapping, -				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); -		WARN_ON_ONCE(ret); +		int err = invalidate_inode_pages2_range(mapping, +				start >> PAGE_SHIFT, end >> PAGE_SHIFT); +		WARN_ON_ONCE(err);  	} -	return iomap_dio_complete(dio); +	return ret;  out_free_dio:  	kfree(dio); diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 2dfdc62f795e..70a5b55e0870 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -25,24 +25,6 @@  #include "kmem.h"  #include "xfs_message.h" -/* - * Greedy allocation.  May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ -	void		*ptr; -	size_t		kmsize = maxsize; - -	while (!(ptr = vzalloc(kmsize))) { -		if ((kmsize >>= 1) <= minsize) -			kmsize = minsize; -	} -	if (ptr) -		*size = kmsize; -	return ptr; -} -  void *  kmem_alloc(size_t size, xfs_km_flags_t flags)  { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -69,8 +69,6 @@ static inline void  kmem_free(const void *ptr)  } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); -  static inline void *  kmem_zalloc(size_t size, xfs_km_flags_t flags)  { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a9c66d47757a..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(  		args.type = XFS_ALLOCTYPE_START_BNO;  		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);  	} else if (dfops->dop_low) { -try_another_ag:  		args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag:  		args.fsbno = *firstblock;  	} else {  		args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -790,13 +790,17 @@ try_another_ag:  	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&  	    args.fsbno == NULLFSBLOCK &&  	    args.type == XFS_ALLOCTYPE_NEAR_BNO) { -		dfops->dop_low = true; +		args.type = XFS_ALLOCTYPE_FIRST_AG;  		goto try_another_ag;  	} +	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { +		xfs_iroot_realloc(ip, -1, whichfork); +		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +		return -ENOSPC; +	}  	/*  	 * Allocation can't fail, the space was reserved.  	 */ -	ASSERT(args.fsbno != NULLFSBLOCK);  	ASSERT(*firstblock == NULLFSBLOCK ||  	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));  	*firstblock = cur->bc_private.b.firstblock = args.fsbno; @@ -4150,6 +4154,19 @@ xfs_bmapi_read(  	return 0;  } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */  int  xfs_bmapi_reserve_delalloc(  	struct xfs_inode	*ip, @@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(  	got->br_startblock = nullstartblock(indlen);  	got->br_blockcount = alen;  	got->br_state = XFS_EXT_NORM; -	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); -	/* -	 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay -	 * might have merged it into one of the neighbouring ones. -	 */ -	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); +	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);  	/*  	 * Tag the inode if blocks were preallocated. Note that COW fork @@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(  	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))  		xfs_inode_set_cowblocks_tag(ip); -	ASSERT(got->br_startoff <= aoff); -	ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); -	ASSERT(isnullstartblock(got->br_startblock)); -	ASSERT(got->br_state == XFS_EXT_NORM);  	return 0;  out_unreserve_blocks: diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f93072b58a58..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(  	if (args.fsbno == NULLFSBLOCK) {  		args.fsbno = be64_to_cpu(start->l); -try_another_ag:  		args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag:  		/*  		 * Make sure there is sufficient room left in the AG to  		 * complete a full tree split for an extent insert.  If @@ -488,8 +488,8 @@ try_another_ag:  	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&  	    args.fsbno == NULLFSBLOCK &&  	    args.type == XFS_ALLOCTYPE_NEAR_BNO) { -		cur->bc_private.b.dfops->dop_low = true;  		args.fsbno = cur->bc_private.b.firstblock; +		args.type = XFS_ALLOCTYPE_FIRST_AG;  		goto try_another_ag;  	} @@ -506,7 +506,7 @@ try_another_ag:  			goto error0;  		cur->bc_private.b.dfops->dop_low = true;  	} -	if (args.fsbno == NULLFSBLOCK) { +	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {  		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);  		*stat = 0;  		return 0; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf65a9ea8642..61494295d92f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -274,54 +274,49 @@ xfs_end_io(  	struct xfs_ioend	*ioend =  		container_of(work, struct xfs_ioend, io_work);  	struct xfs_inode	*ip = XFS_I(ioend->io_inode); +	xfs_off_t		offset = ioend->io_offset; +	size_t			size = ioend->io_size;  	int			error = ioend->io_bio->bi_error;  	/* -	 * Set an error if the mount has shut down and proceed with end I/O -	 * processing so it can perform whatever cleanups are necessary. +	 * Just clean up the in-memory strutures if the fs has been shut down.  	 */ -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) +	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {  		error = -EIO; +		goto done; +	}  	/* -	 * For a CoW extent, we need to move the mapping from the CoW fork -	 * to the data fork.  If instead an error happened, just dump the -	 * new blocks. +	 * Clean up any COW blocks on an I/O error.  	 */ -	if (ioend->io_type == XFS_IO_COW) { -		if (error) -			goto done; -		if (ioend->io_bio->bi_error) { -			error = xfs_reflink_cancel_cow_range(ip, -					ioend->io_offset, ioend->io_size); -			goto done; +	if (unlikely(error)) { +		switch (ioend->io_type) { +		case XFS_IO_COW: +			xfs_reflink_cancel_cow_range(ip, offset, size, true); +			break;  		} -		error = xfs_reflink_end_cow(ip, ioend->io_offset, -				ioend->io_size); -		if (error) -			goto done; + +		goto done;  	}  	/* -	 * For unwritten extents we need to issue transactions to convert a -	 * range to normal written extens after the data I/O has finished. -	 * Detecting and handling completion IO errors is done individually -	 * for each case as different cleanup operations need to be performed -	 * on error. +	 * Success:  commit the COW or unwritten blocks if needed.  	 */ -	if (ioend->io_type == XFS_IO_UNWRITTEN) { -		if (error) -			goto done; -		error = xfs_iomap_write_unwritten(ip, ioend->io_offset, -						  ioend->io_size); -	} else if (ioend->io_append_trans) { -		error = xfs_setfilesize_ioend(ioend, error); -	} else { -		ASSERT(!xfs_ioend_is_append(ioend) || -		       ioend->io_type == XFS_IO_COW); +	switch (ioend->io_type) { +	case XFS_IO_COW: +		error = xfs_reflink_end_cow(ip, offset, size); +		break; +	case XFS_IO_UNWRITTEN: +		error = xfs_iomap_write_unwritten(ip, offset, size); +		break; +	default: +		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); +		break;  	}  done: +	if (ioend->io_append_trans) +		error = xfs_setfilesize_ioend(ioend, error);  	xfs_destroy_ioend(ioend, error);  } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7234b9748c36..3531f8f72fa5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(  	xfs_ilock(ip, XFS_IOLOCK_EXCL);  	xfs_ilock(ip, XFS_MMAPLOCK_EXCL); -	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); +	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);  	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);  	xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edfa6a55b064..7eaf1ef74e3c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1615,7 +1615,7 @@ xfs_itruncate_extents(  	/* Remove all pending CoW reservations. */  	error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, -			last_block); +			last_block, true);  	if (error)  		goto out; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 41662fb14e87..288ee5b840d7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -630,6 +630,11 @@ retry:  		goto out_unlock;  	} +	/* +	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch +	 * them out if the write happens to fail. +	 */ +	iomap->flags = IOMAP_F_NEW;  	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);  done:  	if (isnullstartblock(got.br_startblock)) @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(  	struct xfs_inode	*ip,  	loff_t			offset,  	loff_t			length, -	ssize_t			written) +	ssize_t			written, +	struct iomap		*iomap)  {  	struct xfs_mount	*mp = ip->i_mount;  	xfs_fileoff_t		start_fsb;  	xfs_fileoff_t		end_fsb;  	int			error = 0; -	/* behave as if the write failed if drop writes is enabled */ -	if (xfs_mp_drop_writes(mp)) +	/* +	 * Behave as if the write failed if drop writes is enabled. Set the NEW +	 * flag to force delalloc cleanup. +	 */ +	if (xfs_mp_drop_writes(mp)) { +		iomap->flags |= IOMAP_F_NEW;  		written = 0; +	}  	/*  	 * start_fsb refers to the first unused block after a short write. If @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(  	end_fsb = XFS_B_TO_FSB(mp, offset + length);  	/* -	 * Trim back delalloc blocks if we didn't manage to write the whole -	 * range reserved. +	 * Trim delalloc blocks if they were allocated by this write and we +	 * didn't manage to write the whole range.  	 *  	 * We don't need to care about racing delalloc as we hold i_mutex  	 * across the reserve/allocate/unreserve calls. If there are delalloc  	 * blocks in the range, they are ours.  	 */ -	if (start_fsb < end_fsb) { +	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {  		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),  					 XFS_FSB_TO_B(mp, end_fsb) - 1); @@ -1131,7 +1142,7 @@ xfs_file_iomap_end(  {  	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)  		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, -				length, written); +				length, written, iomap);  	return 0;  } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..2a6d9b1558e0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -361,7 +361,6 @@ xfs_bulkstat(  	xfs_agino_t		agino;	/* inode # in allocation group */  	xfs_agnumber_t		agno;	/* allocation group number */  	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */ -	size_t			irbsize; /* size of irec buffer in bytes */  	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */  	int			nirbuf;	/* size of irbuf */  	int			ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat(  	*ubcountp = 0;  	*done = 0; -	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); +	irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);  	if (!irbuf)  		return -ENOMEM; - -	nirbuf = irbsize / sizeof(*irbuf); +	nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);  	/*  	 * Loop over the allocation groups, starting from the last diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 450bde68bb75..688ebff1f663 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -513,8 +513,7 @@ STATIC void  xfs_set_inoalignment(xfs_mount_t *mp)  {  	if (xfs_sb_version_hasalign(&mp->m_sb) && -	    mp->m_sb.sb_inoalignmt >= -	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) +		mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))  		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;  	else  		mp->m_inoalign_mask = 0; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index da6d08fb359c..4a84c5ea266d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(  }  /* - * Cancel all pending CoW reservations for some block range of an inode. + * Cancel CoW reservations for some block range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared.   */  int  xfs_reflink_cancel_cow_blocks(  	struct xfs_inode		*ip,  	struct xfs_trans		**tpp,  	xfs_fileoff_t			offset_fsb, -	xfs_fileoff_t			end_fsb) +	xfs_fileoff_t			end_fsb, +	bool				cancel_real)  {  	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);  	struct xfs_bmbt_irec		got, del; @@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(  					&idx, &got, &del);  			if (error)  				break; -		} else { +		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {  			xfs_trans_ijoin(*tpp, ip, 0);  			xfs_defer_init(&dfops, &firstfsb); @@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(  }  /* - * Cancel all pending CoW reservations for some byte range of an inode. + * Cancel CoW reservations for some byte range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared.   */  int  xfs_reflink_cancel_cow_range(  	struct xfs_inode	*ip,  	xfs_off_t		offset, -	xfs_off_t		count) +	xfs_off_t		count, +	bool			cancel_real)  {  	struct xfs_trans	*tp;  	xfs_fileoff_t		offset_fsb; @@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(  	xfs_trans_ijoin(tp, ip, 0);  	/* Scrape out the old CoW reservations */ -	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); +	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, +			cancel_real);  	if (error)  		goto out_cancel; @@ -1450,7 +1459,7 @@ next:  	 * We didn't find any shared blocks so turn off the reflink flag.  	 * First, get rid of any leftover CoW mappings.  	 */ -	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); +	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);  	if (error)  		return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 33ac9b8db683..d29a7967f029 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,  extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,  		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, -		xfs_fileoff_t end_fsb); +		xfs_fileoff_t end_fsb, bool cancel_real);  extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, -		xfs_off_t count); +		xfs_off_t count, bool cancel_real);  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,  		xfs_off_t count);  extern int xfs_reflink_recover_cow(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 890862f2447c..685c042a120f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -953,7 +953,7 @@ xfs_fs_destroy_inode(  	XFS_STATS_INC(ip->i_mount, vn_remove);  	if (xfs_is_reflink_inode(ip)) { -		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); +		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);  		if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))  			xfs_warn(ip->i_mount,  "Error %d while evicting CoW blocks for inode %llu.", |