diff options
Diffstat (limited to 'fs/xfs/xfs_exchmaps_item.c')
| -rw-r--r-- | fs/xfs/xfs_exchmaps_item.c | 614 | 
1 files changed, 614 insertions, 0 deletions
diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c new file mode 100644 index 000000000000..264a121c5e16 --- /dev/null +++ b/fs/xfs/xfs_exchmaps_item.c @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle.  All Rights Reserved. + * Author: Darrick J. Wong <[email protected]> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_shared.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_exchmaps_item.h" +#include "xfs_exchmaps.h" +#include "xfs_log.h" +#include "xfs_bmap.h" +#include "xfs_icache.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_error.h" +#include "xfs_log_priv.h" +#include "xfs_log_recover.h" +#include "xfs_exchrange.h" +#include "xfs_trace.h" + +struct kmem_cache	*xfs_xmi_cache; +struct kmem_cache	*xfs_xmd_cache; + +static const struct xfs_item_ops xfs_xmi_item_ops; + +static inline struct xfs_xmi_log_item *XMI_ITEM(struct xfs_log_item *lip) +{ +	return container_of(lip, struct xfs_xmi_log_item, xmi_item); +} + +STATIC void +xfs_xmi_item_free( +	struct xfs_xmi_log_item	*xmi_lip) +{ +	kvfree(xmi_lip->xmi_item.li_lv_shadow); +	kmem_cache_free(xfs_xmi_cache, xmi_lip); +} + +/* + * Freeing the XMI requires that we remove it from the AIL if it has already + * been placed there. However, the XMI may not yet have been placed in the AIL + * when called by xfs_xmi_release() from XMD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the XMI. + */ +STATIC void +xfs_xmi_release( +	struct xfs_xmi_log_item	*xmi_lip) +{ +	ASSERT(atomic_read(&xmi_lip->xmi_refcount) > 0); +	if (atomic_dec_and_test(&xmi_lip->xmi_refcount)) { +		xfs_trans_ail_delete(&xmi_lip->xmi_item, 0); +		xfs_xmi_item_free(xmi_lip); +	} +} + + +STATIC void +xfs_xmi_item_size( +	struct xfs_log_item	*lip, +	int			*nvecs, +	int			*nbytes) +{ +	*nvecs += 1; +	*nbytes += sizeof(struct xfs_xmi_log_format); +} + +/* + * This is called to fill in the vector of log iovecs for the given xmi log + * item. We use only 1 iovec, and we point that at the xmi_log_format structure + * embedded in the xmi item. + */ +STATIC void +xfs_xmi_item_format( +	struct xfs_log_item	*lip, +	struct xfs_log_vec	*lv) +{ +	struct xfs_xmi_log_item	*xmi_lip = XMI_ITEM(lip); +	struct xfs_log_iovec	*vecp = NULL; + +	xmi_lip->xmi_format.xmi_type = XFS_LI_XMI; +	xmi_lip->xmi_format.xmi_size = 1; + +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT, +			&xmi_lip->xmi_format, +			sizeof(struct xfs_xmi_log_format)); +} + +/* + * The unpin operation is the last place an XMI is manipulated in the log. It + * is either inserted in the AIL or aborted in the event of a log I/O error. In + * either case, the XMI transaction has been successfully committed to make it + * this far. Therefore, we expect whoever committed the XMI to either construct + * and commit the XMD or drop the XMD's reference in the event of error. Simply + * drop the log's XMI reference now that the log is done with it. + */ +STATIC void +xfs_xmi_item_unpin( +	struct xfs_log_item	*lip, +	int			remove) +{ +	struct xfs_xmi_log_item	*xmi_lip = XMI_ITEM(lip); + +	xfs_xmi_release(xmi_lip); +} + +/* + * The XMI has been either committed or aborted if the transaction has been + * cancelled. If the transaction was cancelled, an XMD isn't going to be + * constructed and thus we free the XMI here directly. + */ +STATIC void +xfs_xmi_item_release( +	struct xfs_log_item	*lip) +{ +	xfs_xmi_release(XMI_ITEM(lip)); +} + +/* Allocate and initialize an xmi item. */ +STATIC struct xfs_xmi_log_item * +xfs_xmi_init( +	struct xfs_mount	*mp) + +{ +	struct xfs_xmi_log_item	*xmi_lip; + +	xmi_lip = kmem_cache_zalloc(xfs_xmi_cache, GFP_KERNEL | __GFP_NOFAIL); + +	xfs_log_item_init(mp, &xmi_lip->xmi_item, XFS_LI_XMI, &xfs_xmi_item_ops); +	xmi_lip->xmi_format.xmi_id = (uintptr_t)(void *)xmi_lip; +	atomic_set(&xmi_lip->xmi_refcount, 2); + +	return xmi_lip; +} + +static inline struct xfs_xmd_log_item *XMD_ITEM(struct xfs_log_item *lip) +{ +	return container_of(lip, struct xfs_xmd_log_item, xmd_item); +} + +STATIC void +xfs_xmd_item_size( +	struct xfs_log_item	*lip, +	int			*nvecs, +	int			*nbytes) +{ +	*nvecs += 1; +	*nbytes += sizeof(struct xfs_xmd_log_format); +} + +/* + * This is called to fill in the vector of log iovecs for the given xmd log + * item. We use only 1 iovec, and we point that at the xmd_log_format structure + * embedded in the xmd item. + */ +STATIC void +xfs_xmd_item_format( +	struct xfs_log_item	*lip, +	struct xfs_log_vec	*lv) +{ +	struct xfs_xmd_log_item	*xmd_lip = XMD_ITEM(lip); +	struct xfs_log_iovec	*vecp = NULL; + +	xmd_lip->xmd_format.xmd_type = XFS_LI_XMD; +	xmd_lip->xmd_format.xmd_size = 1; + +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, +			sizeof(struct xfs_xmd_log_format)); +} + +/* + * The XMD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the XMI and free the + * XMD. + */ +STATIC void +xfs_xmd_item_release( +	struct xfs_log_item	*lip) +{ +	struct xfs_xmd_log_item	*xmd_lip = XMD_ITEM(lip); + +	xfs_xmi_release(xmd_lip->xmd_intent_log_item); +	kvfree(xmd_lip->xmd_item.li_lv_shadow); +	kmem_cache_free(xfs_xmd_cache, xmd_lip); +} + +static struct xfs_log_item * +xfs_xmd_item_intent( +	struct xfs_log_item	*lip) +{ +	return &XMD_ITEM(lip)->xmd_intent_log_item->xmi_item; +} + +static const struct xfs_item_ops xfs_xmd_item_ops = { +	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED | +			  XFS_ITEM_INTENT_DONE, +	.iop_size	= xfs_xmd_item_size, +	.iop_format	= xfs_xmd_item_format, +	.iop_release	= xfs_xmd_item_release, +	.iop_intent	= xfs_xmd_item_intent, +}; + +/* Log file mapping exchange information in the intent item. */ +STATIC struct xfs_log_item * +xfs_exchmaps_create_intent( +	struct xfs_trans		*tp, +	struct list_head		*items, +	unsigned int			count, +	bool				sort) +{ +	struct xfs_xmi_log_item		*xmi_lip; +	struct xfs_exchmaps_intent	*xmi; +	struct xfs_xmi_log_format	*xlf; + +	ASSERT(count == 1); + +	xmi = list_first_entry_or_null(items, struct xfs_exchmaps_intent, +			xmi_list); + +	xmi_lip = xfs_xmi_init(tp->t_mountp); +	xlf = &xmi_lip->xmi_format; + +	xlf->xmi_inode1 = xmi->xmi_ip1->i_ino; +	xlf->xmi_igen1 = VFS_I(xmi->xmi_ip1)->i_generation; +	xlf->xmi_inode2 = xmi->xmi_ip2->i_ino; +	xlf->xmi_igen2 = VFS_I(xmi->xmi_ip2)->i_generation; +	xlf->xmi_startoff1 = xmi->xmi_startoff1; +	xlf->xmi_startoff2 = xmi->xmi_startoff2; +	xlf->xmi_blockcount = xmi->xmi_blockcount; +	xlf->xmi_isize1 = xmi->xmi_isize1; +	xlf->xmi_isize2 = xmi->xmi_isize2; +	xlf->xmi_flags = xmi->xmi_flags & XFS_EXCHMAPS_LOGGED_FLAGS; + +	return &xmi_lip->xmi_item; +} + +STATIC struct xfs_log_item * +xfs_exchmaps_create_done( +	struct xfs_trans		*tp, +	struct xfs_log_item		*intent, +	unsigned int			count) +{ +	struct xfs_xmi_log_item		*xmi_lip = XMI_ITEM(intent); +	struct xfs_xmd_log_item		*xmd_lip; + +	xmd_lip = kmem_cache_zalloc(xfs_xmd_cache, GFP_KERNEL | __GFP_NOFAIL); +	xfs_log_item_init(tp->t_mountp, &xmd_lip->xmd_item, XFS_LI_XMD, +			  &xfs_xmd_item_ops); +	xmd_lip->xmd_intent_log_item = xmi_lip; +	xmd_lip->xmd_format.xmd_xmi_id = xmi_lip->xmi_format.xmi_id; + +	return &xmd_lip->xmd_item; +} + +/* Add this deferred XMI to the transaction. */ +void +xfs_exchmaps_defer_add( +	struct xfs_trans		*tp, +	struct xfs_exchmaps_intent	*xmi) +{ +	trace_xfs_exchmaps_defer(tp->t_mountp, xmi); + +	xfs_defer_add(tp, &xmi->xmi_list, &xfs_exchmaps_defer_type); +} + +static inline struct xfs_exchmaps_intent *xmi_entry(const struct list_head *e) +{ +	return list_entry(e, struct xfs_exchmaps_intent, xmi_list); +} + +/* Cancel a deferred file mapping exchange. */ +STATIC void +xfs_exchmaps_cancel_item( +	struct list_head		*item) +{ +	struct xfs_exchmaps_intent	*xmi = xmi_entry(item); + +	kmem_cache_free(xfs_exchmaps_intent_cache, xmi); +} + +/* Process a deferred file mapping exchange. */ +STATIC int +xfs_exchmaps_finish_item( +	struct xfs_trans		*tp, +	struct xfs_log_item		*done, +	struct list_head		*item, +	struct xfs_btree_cur		**state) +{ +	struct xfs_exchmaps_intent	*xmi = xmi_entry(item); +	int				error; + +	/* +	 * Exchange one more mappings between two files.  If there's still more +	 * work to do, we want to requeue ourselves after all other pending +	 * deferred operations have finished.  This includes all of the dfops +	 * that we queued directly as well as any new ones created in the +	 * process of finishing the others.  Doing so prevents us from queuing +	 * a large number of XMI log items in kernel memory, which in turn +	 * prevents us from pinning the tail of the log (while logging those +	 * new XMI items) until the first XMI items can be processed. +	 */ +	error = xfs_exchmaps_finish_one(tp, xmi); +	if (error != -EAGAIN) +		xfs_exchmaps_cancel_item(item); +	return error; +} + +/* Abort all pending XMIs. */ +STATIC void +xfs_exchmaps_abort_intent( +	struct xfs_log_item		*intent) +{ +	xfs_xmi_release(XMI_ITEM(intent)); +} + +/* Is this recovered XMI ok? */ +static inline bool +xfs_xmi_validate( +	struct xfs_mount		*mp, +	struct xfs_xmi_log_item		*xmi_lip) +{ +	struct xfs_xmi_log_format	*xlf = &xmi_lip->xmi_format; + +	if (!xfs_has_exchange_range(mp)) +		return false; + +	if (xmi_lip->xmi_format.__pad != 0) +		return false; + +	if (xlf->xmi_flags & ~XFS_EXCHMAPS_LOGGED_FLAGS) +		return false; + +	if (!xfs_verify_ino(mp, xlf->xmi_inode1) || +	    !xfs_verify_ino(mp, xlf->xmi_inode2)) +		return false; + +	if (!xfs_verify_fileext(mp, xlf->xmi_startoff1, xlf->xmi_blockcount)) +		return false; + +	return xfs_verify_fileext(mp, xlf->xmi_startoff2, xlf->xmi_blockcount); +} + +/* + * Use the recovered log state to create a new request, estimate resource + * requirements, and create a new incore intent state. + */ +STATIC struct xfs_exchmaps_intent * +xfs_xmi_item_recover_intent( +	struct xfs_mount		*mp, +	struct xfs_defer_pending	*dfp, +	const struct xfs_xmi_log_format	*xlf, +	struct xfs_exchmaps_req		*req, +	struct xfs_inode		**ipp1, +	struct xfs_inode		**ipp2) +{ +	struct xfs_inode		*ip1, *ip2; +	struct xfs_exchmaps_intent	*xmi; +	int				error; + +	/* +	 * Grab both inodes and set IRECOVERY to prevent trimming of post-eof +	 * mappings and freeing of unlinked inodes until we're totally done +	 * processing files.  The ondisk format of this new log item contains +	 * file handle information, which is why recovery for other items do +	 * not check the inode generation number. +	 */ +	error = xlog_recover_iget_handle(mp, xlf->xmi_inode1, xlf->xmi_igen1, +			&ip1); +	if (error) { +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, +				sizeof(*xlf)); +		return ERR_PTR(error); +	} + +	error = xlog_recover_iget_handle(mp, xlf->xmi_inode2, xlf->xmi_igen2, +			&ip2); +	if (error) { +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, +				sizeof(*xlf)); +		goto err_rele1; +	} + +	req->ip1 = ip1; +	req->ip2 = ip2; +	req->startoff1 = xlf->xmi_startoff1; +	req->startoff2 = xlf->xmi_startoff2; +	req->blockcount = xlf->xmi_blockcount; +	req->flags = xlf->xmi_flags & XFS_EXCHMAPS_PARAMS; + +	xfs_exchrange_ilock(NULL, ip1, ip2); +	error = xfs_exchmaps_estimate(req); +	xfs_exchrange_iunlock(ip1, ip2); +	if (error) +		goto err_rele2; + +	*ipp1 = ip1; +	*ipp2 = ip2; +	xmi = xfs_exchmaps_init_intent(req); +	xfs_defer_add_item(dfp, &xmi->xmi_list); +	return xmi; + +err_rele2: +	xfs_irele(ip2); +err_rele1: +	xfs_irele(ip1); +	req->ip2 = req->ip1 = NULL; +	return ERR_PTR(error); +} + +/* Process a file mapping exchange item that was recovered from the log. */ +STATIC int +xfs_exchmaps_recover_work( +	struct xfs_defer_pending	*dfp, +	struct list_head		*capture_list) +{ +	struct xfs_exchmaps_req		req = { .flags = 0 }; +	struct xfs_trans_res		resv; +	struct xfs_exchmaps_intent	*xmi; +	struct xfs_log_item		*lip = dfp->dfp_intent; +	struct xfs_xmi_log_item		*xmi_lip = XMI_ITEM(lip); +	struct xfs_mount		*mp = lip->li_log->l_mp; +	struct xfs_trans		*tp; +	struct xfs_inode		*ip1, *ip2; +	int				error = 0; + +	if (!xfs_xmi_validate(mp, xmi_lip)) { +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, +				&xmi_lip->xmi_format, +				sizeof(xmi_lip->xmi_format)); +		return -EFSCORRUPTED; +	} + +	xmi = xfs_xmi_item_recover_intent(mp, dfp, &xmi_lip->xmi_format, &req, +			&ip1, &ip2); +	if (IS_ERR(xmi)) +		return PTR_ERR(xmi); + +	trace_xfs_exchmaps_recover(mp, xmi); + +	resv = xlog_recover_resv(&M_RES(mp)->tr_write); +	error = xfs_trans_alloc(mp, &resv, req.resblks, 0, 0, &tp); +	if (error) +		goto err_rele; + +	xfs_exchrange_ilock(tp, ip1, ip2); + +	xfs_exchmaps_ensure_reflink(tp, xmi); +	xfs_exchmaps_upgrade_extent_counts(tp, xmi); +	error = xlog_recover_finish_intent(tp, dfp); +	if (error == -EFSCORRUPTED) +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, +				&xmi_lip->xmi_format, +				sizeof(xmi_lip->xmi_format)); +	if (error) +		goto err_cancel; + +	/* +	 * Commit transaction, which frees the transaction and saves the inodes +	 * for later replay activities. +	 */ +	error = xfs_defer_ops_capture_and_commit(tp, capture_list); +	goto err_unlock; + +err_cancel: +	xfs_trans_cancel(tp); +err_unlock: +	xfs_exchrange_iunlock(ip1, ip2); +err_rele: +	xfs_irele(ip2); +	xfs_irele(ip1); +	return error; +} + +/* Relog an intent item to push the log tail forward. */ +static struct xfs_log_item * +xfs_exchmaps_relog_intent( +	struct xfs_trans		*tp, +	struct xfs_log_item		*intent, +	struct xfs_log_item		*done_item) +{ +	struct xfs_xmi_log_item		*xmi_lip; +	struct xfs_xmi_log_format	*old_xlf, *new_xlf; + +	old_xlf = &XMI_ITEM(intent)->xmi_format; + +	xmi_lip = xfs_xmi_init(tp->t_mountp); +	new_xlf = &xmi_lip->xmi_format; + +	new_xlf->xmi_inode1	= old_xlf->xmi_inode1; +	new_xlf->xmi_inode2	= old_xlf->xmi_inode2; +	new_xlf->xmi_igen1	= old_xlf->xmi_igen1; +	new_xlf->xmi_igen2	= old_xlf->xmi_igen2; +	new_xlf->xmi_startoff1	= old_xlf->xmi_startoff1; +	new_xlf->xmi_startoff2	= old_xlf->xmi_startoff2; +	new_xlf->xmi_blockcount	= old_xlf->xmi_blockcount; +	new_xlf->xmi_flags	= old_xlf->xmi_flags; +	new_xlf->xmi_isize1	= old_xlf->xmi_isize1; +	new_xlf->xmi_isize2	= old_xlf->xmi_isize2; + +	return &xmi_lip->xmi_item; +} + +const struct xfs_defer_op_type xfs_exchmaps_defer_type = { +	.name		= "exchmaps", +	.max_items	= 1, +	.create_intent	= xfs_exchmaps_create_intent, +	.abort_intent	= xfs_exchmaps_abort_intent, +	.create_done	= xfs_exchmaps_create_done, +	.finish_item	= xfs_exchmaps_finish_item, +	.cancel_item	= xfs_exchmaps_cancel_item, +	.recover_work	= xfs_exchmaps_recover_work, +	.relog_intent	= xfs_exchmaps_relog_intent, +}; + +STATIC bool +xfs_xmi_item_match( +	struct xfs_log_item	*lip, +	uint64_t		intent_id) +{ +	return XMI_ITEM(lip)->xmi_format.xmi_id == intent_id; +} + +static const struct xfs_item_ops xfs_xmi_item_ops = { +	.flags		= XFS_ITEM_INTENT, +	.iop_size	= xfs_xmi_item_size, +	.iop_format	= xfs_xmi_item_format, +	.iop_unpin	= xfs_xmi_item_unpin, +	.iop_release	= xfs_xmi_item_release, +	.iop_match	= xfs_xmi_item_match, +}; + +/* + * This routine is called to create an in-core file mapping exchange item from + * the xmi format structure which was logged on disk.  It allocates an in-core + * xmi, copies the exchange information from the format structure into it, and + * adds the xmi to the AIL with the given LSN. + */ +STATIC int +xlog_recover_xmi_commit_pass2( +	struct xlog			*log, +	struct list_head		*buffer_list, +	struct xlog_recover_item	*item, +	xfs_lsn_t			lsn) +{ +	struct xfs_mount		*mp = log->l_mp; +	struct xfs_xmi_log_item		*xmi_lip; +	struct xfs_xmi_log_format	*xmi_formatp; +	size_t				len; + +	len = sizeof(struct xfs_xmi_log_format); +	if (item->ri_buf[0].i_len != len) { +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); +		return -EFSCORRUPTED; +	} + +	xmi_formatp = item->ri_buf[0].i_addr; +	if (xmi_formatp->__pad != 0) { +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); +		return -EFSCORRUPTED; +	} + +	xmi_lip = xfs_xmi_init(mp); +	memcpy(&xmi_lip->xmi_format, xmi_formatp, len); + +	xlog_recover_intent_item(log, &xmi_lip->xmi_item, lsn, +			&xfs_exchmaps_defer_type); +	return 0; +} + +const struct xlog_recover_item_ops xlog_xmi_item_ops = { +	.item_type		= XFS_LI_XMI, +	.commit_pass2		= xlog_recover_xmi_commit_pass2, +}; + +/* + * This routine is called when an XMD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding XMI if it + * was still in the log. To do this it searches the AIL for the XMI with an id + * equal to that in the XMD format structure. If we find it we drop the XMD + * reference, which removes the XMI from the AIL and frees it. + */ +STATIC int +xlog_recover_xmd_commit_pass2( +	struct xlog			*log, +	struct list_head		*buffer_list, +	struct xlog_recover_item	*item, +	xfs_lsn_t			lsn) +{ +	struct xfs_xmd_log_format	*xmd_formatp; + +	xmd_formatp = item->ri_buf[0].i_addr; +	if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) { +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); +		return -EFSCORRUPTED; +	} + +	xlog_recover_release_intent(log, XFS_LI_XMI, xmd_formatp->xmd_xmi_id); +	return 0; +} + +const struct xlog_recover_item_ops xlog_xmd_item_ops = { +	.item_type		= XFS_LI_XMD, +	.commit_pass2		= xlog_recover_xmd_commit_pass2, +};  |