diff options
| author | Ingo Molnar <[email protected]> | 2016-10-16 11:31:39 +0200 | 
|---|---|---|
| committer | Ingo Molnar <[email protected]> | 2016-10-16 11:31:39 +0200 | 
| commit | 1d33369db25eb7f37b7a8bd22d736888b4501a9c (patch) | |
| tree | 116d764339be1bca928870151decbedc53a9e1d1 /fs/xfs/xfs_log_recover.c | |
| parent | 23446cb66c073b827779e5eb3dec301623299b32 (diff) | |
| parent | 1001354ca34179f3db924eb66672442a173147dc (diff) | |
Merge tag 'v4.9-rc1' into x86/urgent, to pick up updates
Signed-off-by: Ingo Molnar <[email protected]>
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
| -rw-r--r-- | fs/xfs/xfs_log_recover.c | 548 | 
1 files changed, 492 insertions, 56 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e8638fd2c0c3..9b3d7c76915d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -44,6 +44,9 @@  #include "xfs_error.h"  #include "xfs_dir2.h"  #include "xfs_rmap_item.h" +#include "xfs_buf_item.h" +#include "xfs_refcount_item.h" +#include "xfs_bmap_item.h"  #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1) @@ -381,6 +384,15 @@ xlog_recover_iodone(  						SHUTDOWN_META_IO_ERROR);  		}  	} + +	/* +	 * On v5 supers, a bli could be attached to update the metadata LSN. +	 * Clean it up. +	 */ +	if (bp->b_fspriv) +		xfs_buf_item_relse(bp); +	ASSERT(bp->b_fspriv == NULL); +  	bp->b_iodone = NULL;  	xfs_buf_ioend(bp);  } @@ -1914,6 +1926,10 @@ xlog_recover_reorder_trans(  		case XFS_LI_EFI:  		case XFS_LI_RUI:  		case XFS_LI_RUD: +		case XFS_LI_CUI: +		case XFS_LI_CUD: +		case XFS_LI_BUI: +		case XFS_LI_BUD:  			trace_xfs_log_recover_item_reorder_tail(log,  							trans, item, pass);  			list_move_tail(&item->ri_list, &inode_list); @@ -2232,6 +2248,7 @@ xlog_recover_get_buf_lsn(  	case XFS_ABTB_MAGIC:  	case XFS_ABTC_MAGIC:  	case XFS_RMAP_CRC_MAGIC: +	case XFS_REFC_CRC_MAGIC:  	case XFS_IBT_CRC_MAGIC:  	case XFS_IBT_MAGIC: {  		struct xfs_btree_block *btb = blk; @@ -2360,12 +2377,14 @@ static void  xlog_recover_validate_buf_type(  	struct xfs_mount	*mp,  	struct xfs_buf		*bp, -	xfs_buf_log_format_t	*buf_f) +	xfs_buf_log_format_t	*buf_f, +	xfs_lsn_t		current_lsn)  {  	struct xfs_da_blkinfo	*info = bp->b_addr;  	__uint32_t		magic32;  	__uint16_t		magic16;  	__uint16_t		magicda; +	char			*warnmsg = NULL;  	/*  	 * We can only do post recovery validation on items on CRC enabled @@ -2403,32 +2422,31 @@ xlog_recover_validate_buf_type(  		case XFS_RMAP_CRC_MAGIC:  			bp->b_ops = &xfs_rmapbt_buf_ops;  			break; +		case XFS_REFC_CRC_MAGIC: +			bp->b_ops = &xfs_refcountbt_buf_ops; +			break;  		default: -			xfs_warn(mp, "Bad btree block magic!"); -			ASSERT(0); +			warnmsg = "Bad btree block magic!";  			break;  		}  		break;  	case XFS_BLFT_AGF_BUF:  		if (magic32 != XFS_AGF_MAGIC) { -			xfs_warn(mp, "Bad AGF block magic!"); -			ASSERT(0); +			warnmsg = "Bad AGF block magic!";  			break;  		}  		bp->b_ops = &xfs_agf_buf_ops;  		break;  	case XFS_BLFT_AGFL_BUF:  		if (magic32 != XFS_AGFL_MAGIC) { -			xfs_warn(mp, "Bad AGFL block magic!"); -			ASSERT(0); +			warnmsg = "Bad AGFL block magic!";  			break;  		}  		bp->b_ops = &xfs_agfl_buf_ops;  		break;  	case XFS_BLFT_AGI_BUF:  		if (magic32 != XFS_AGI_MAGIC) { -			xfs_warn(mp, "Bad AGI block magic!"); -			ASSERT(0); +			warnmsg = "Bad AGI block magic!";  			break;  		}  		bp->b_ops = &xfs_agi_buf_ops; @@ -2438,8 +2456,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_GDQUOT_BUF:  #ifdef CONFIG_XFS_QUOTA  		if (magic16 != XFS_DQUOT_MAGIC) { -			xfs_warn(mp, "Bad DQUOT block magic!"); -			ASSERT(0); +			warnmsg = "Bad DQUOT block magic!";  			break;  		}  		bp->b_ops = &xfs_dquot_buf_ops; @@ -2451,16 +2468,14 @@ xlog_recover_validate_buf_type(  		break;  	case XFS_BLFT_DINO_BUF:  		if (magic16 != XFS_DINODE_MAGIC) { -			xfs_warn(mp, "Bad INODE block magic!"); -			ASSERT(0); +			warnmsg = "Bad INODE block magic!";  			break;  		}  		bp->b_ops = &xfs_inode_buf_ops;  		break;  	case XFS_BLFT_SYMLINK_BUF:  		if (magic32 != XFS_SYMLINK_MAGIC) { -			xfs_warn(mp, "Bad symlink block magic!"); -			ASSERT(0); +			warnmsg = "Bad symlink block magic!";  			break;  		}  		bp->b_ops = &xfs_symlink_buf_ops; @@ -2468,8 +2483,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DIR_BLOCK_BUF:  		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&  		    magic32 != XFS_DIR3_BLOCK_MAGIC) { -			xfs_warn(mp, "Bad dir block magic!"); -			ASSERT(0); +			warnmsg = "Bad dir block magic!";  			break;  		}  		bp->b_ops = &xfs_dir3_block_buf_ops; @@ -2477,8 +2491,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DIR_DATA_BUF:  		if (magic32 != XFS_DIR2_DATA_MAGIC &&  		    magic32 != XFS_DIR3_DATA_MAGIC) { -			xfs_warn(mp, "Bad dir data magic!"); -			ASSERT(0); +			warnmsg = "Bad dir data magic!";  			break;  		}  		bp->b_ops = &xfs_dir3_data_buf_ops; @@ -2486,8 +2499,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DIR_FREE_BUF:  		if (magic32 != XFS_DIR2_FREE_MAGIC &&  		    magic32 != XFS_DIR3_FREE_MAGIC) { -			xfs_warn(mp, "Bad dir3 free magic!"); -			ASSERT(0); +			warnmsg = "Bad dir3 free magic!";  			break;  		}  		bp->b_ops = &xfs_dir3_free_buf_ops; @@ -2495,8 +2507,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DIR_LEAF1_BUF:  		if (magicda != XFS_DIR2_LEAF1_MAGIC &&  		    magicda != XFS_DIR3_LEAF1_MAGIC) { -			xfs_warn(mp, "Bad dir leaf1 magic!"); -			ASSERT(0); +			warnmsg = "Bad dir leaf1 magic!";  			break;  		}  		bp->b_ops = &xfs_dir3_leaf1_buf_ops; @@ -2504,8 +2515,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DIR_LEAFN_BUF:  		if (magicda != XFS_DIR2_LEAFN_MAGIC &&  		    magicda != XFS_DIR3_LEAFN_MAGIC) { -			xfs_warn(mp, "Bad dir leafn magic!"); -			ASSERT(0); +			warnmsg = "Bad dir leafn magic!";  			break;  		}  		bp->b_ops = &xfs_dir3_leafn_buf_ops; @@ -2513,8 +2523,7 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_DA_NODE_BUF:  		if (magicda != XFS_DA_NODE_MAGIC &&  		    magicda != XFS_DA3_NODE_MAGIC) { -			xfs_warn(mp, "Bad da node magic!"); -			ASSERT(0); +			warnmsg = "Bad da node magic!";  			break;  		}  		bp->b_ops = &xfs_da3_node_buf_ops; @@ -2522,24 +2531,21 @@ xlog_recover_validate_buf_type(  	case XFS_BLFT_ATTR_LEAF_BUF:  		if (magicda != XFS_ATTR_LEAF_MAGIC &&  		    magicda != XFS_ATTR3_LEAF_MAGIC) { -			xfs_warn(mp, "Bad attr leaf magic!"); -			ASSERT(0); +			warnmsg = "Bad attr leaf magic!";  			break;  		}  		bp->b_ops = &xfs_attr3_leaf_buf_ops;  		break;  	case XFS_BLFT_ATTR_RMT_BUF:  		if (magic32 != XFS_ATTR3_RMT_MAGIC) { -			xfs_warn(mp, "Bad attr remote magic!"); -			ASSERT(0); +			warnmsg = "Bad attr remote magic!";  			break;  		}  		bp->b_ops = &xfs_attr3_rmt_buf_ops;  		break;  	case XFS_BLFT_SB_BUF:  		if (magic32 != XFS_SB_MAGIC) { -			xfs_warn(mp, "Bad SB block magic!"); -			ASSERT(0); +			warnmsg = "Bad SB block magic!";  			break;  		}  		bp->b_ops = &xfs_sb_buf_ops; @@ -2556,6 +2562,40 @@ xlog_recover_validate_buf_type(  			 xfs_blft_from_flags(buf_f));  		break;  	} + +	/* +	 * Nothing else to do in the case of a NULL current LSN as this means +	 * the buffer is more recent than the change in the log and will be +	 * skipped. +	 */ +	if (current_lsn == NULLCOMMITLSN) +		return; + +	if (warnmsg) { +		xfs_warn(mp, warnmsg); +		ASSERT(0); +	} + +	/* +	 * We must update the metadata LSN of the buffer as it is written out to +	 * ensure that older transactions never replay over this one and corrupt +	 * the buffer. This can occur if log recovery is interrupted at some +	 * point after the current transaction completes, at which point a +	 * subsequent mount starts recovery from the beginning. +	 * +	 * Write verifiers update the metadata LSN from log items attached to +	 * the buffer. Therefore, initialize a bli purely to carry the LSN to +	 * the verifier. We'll clean it up in our ->iodone() callback. +	 */ +	if (bp->b_ops) { +		struct xfs_buf_log_item	*bip; + +		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); +		bp->b_iodone = xlog_recover_iodone; +		xfs_buf_item_init(bp, mp); +		bip = bp->b_fspriv; +		bip->bli_item.li_lsn = current_lsn; +	}  }  /* @@ -2569,7 +2609,8 @@ xlog_recover_do_reg_buffer(  	struct xfs_mount	*mp,  	xlog_recover_item_t	*item,  	struct xfs_buf		*bp, -	xfs_buf_log_format_t	*buf_f) +	xfs_buf_log_format_t	*buf_f, +	xfs_lsn_t		current_lsn)  {  	int			i;  	int			bit; @@ -2642,7 +2683,7 @@ xlog_recover_do_reg_buffer(  	/* Shouldn't be any more regions */  	ASSERT(i == item->ri_total); -	xlog_recover_validate_buf_type(mp, bp, buf_f); +	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);  }  /* @@ -2685,7 +2726,7 @@ xlog_recover_do_dquot_buffer(  	if (log->l_quotaoffs_flag & type)  		return false; -	xlog_recover_do_reg_buffer(mp, item, bp, buf_f); +	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);  	return true;  } @@ -2773,7 +2814,8 @@ xlog_recover_buffer_pass2(  	 */  	lsn = xlog_recover_get_buf_lsn(mp, bp);  	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { -		xlog_recover_validate_buf_type(mp, bp, buf_f); +		trace_xfs_log_recover_buf_skip(log, buf_f); +		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);  		goto out_release;  	} @@ -2789,7 +2831,7 @@ xlog_recover_buffer_pass2(  		if (!dirty)  			goto out_release;  	} else { -		xlog_recover_do_reg_buffer(mp, item, bp, buf_f); +		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);  	}  	/* @@ -3515,6 +3557,242 @@ xlog_recover_rud_pass2(  }  /* + * Copy an CUI format buffer from the given buf, and into the destination + * CUI format structure.  The CUI/CUD items were designed not to need any + * special alignment handling. + */ +static int +xfs_cui_copy_format( +	struct xfs_log_iovec		*buf, +	struct xfs_cui_log_format	*dst_cui_fmt) +{ +	struct xfs_cui_log_format	*src_cui_fmt; +	uint				len; + +	src_cui_fmt = buf->i_addr; +	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + +	if (buf->i_len == len) { +		memcpy(dst_cui_fmt, src_cui_fmt, len); +		return 0; +	} +	return -EFSCORRUPTED; +} + +/* + * This routine is called to create an in-core extent refcount update + * item from the cui format structure which was logged on disk. + * It allocates an in-core cui, copies the extents from the format + * structure into it, and adds the cui to the AIL with the given + * LSN. + */ +STATIC int +xlog_recover_cui_pass2( +	struct xlog			*log, +	struct xlog_recover_item	*item, +	xfs_lsn_t			lsn) +{ +	int				error; +	struct xfs_mount		*mp = log->l_mp; +	struct xfs_cui_log_item		*cuip; +	struct xfs_cui_log_format	*cui_formatp; + +	cui_formatp = item->ri_buf[0].i_addr; + +	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); +	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); +	if (error) { +		xfs_cui_item_free(cuip); +		return error; +	} +	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); + +	spin_lock(&log->l_ailp->xa_lock); +	/* +	 * The CUI has two references. One for the CUD and one for CUI to ensure +	 * it makes it into the AIL. Insert the CUI into the AIL directly and +	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the +	 * AIL lock. +	 */ +	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn); +	xfs_cui_release(cuip); +	return 0; +} + + +/* + * This routine is called when an CUD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding CUI if it + * was still in the log. To do this it searches the AIL for the CUI with an id + * equal to that in the CUD format structure. If we find it we drop the CUD + * reference, which removes the CUI from the AIL and frees it. + */ +STATIC int +xlog_recover_cud_pass2( +	struct xlog			*log, +	struct xlog_recover_item	*item) +{ +	struct xfs_cud_log_format	*cud_formatp; +	struct xfs_cui_log_item		*cuip = NULL; +	struct xfs_log_item		*lip; +	__uint64_t			cui_id; +	struct xfs_ail_cursor		cur; +	struct xfs_ail			*ailp = log->l_ailp; + +	cud_formatp = item->ri_buf[0].i_addr; +	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) +		return -EFSCORRUPTED; +	cui_id = cud_formatp->cud_cui_id; + +	/* +	 * Search for the CUI with the id in the CUD format structure in the +	 * AIL. +	 */ +	spin_lock(&ailp->xa_lock); +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); +	while (lip != NULL) { +		if (lip->li_type == XFS_LI_CUI) { +			cuip = (struct xfs_cui_log_item *)lip; +			if (cuip->cui_format.cui_id == cui_id) { +				/* +				 * Drop the CUD reference to the CUI. This +				 * removes the CUI from the AIL and frees it. +				 */ +				spin_unlock(&ailp->xa_lock); +				xfs_cui_release(cuip); +				spin_lock(&ailp->xa_lock); +				break; +			} +		} +		lip = xfs_trans_ail_cursor_next(ailp, &cur); +	} + +	xfs_trans_ail_cursor_done(&cur); +	spin_unlock(&ailp->xa_lock); + +	return 0; +} + +/* + * Copy an BUI format buffer from the given buf, and into the destination + * BUI format structure.  The BUI/BUD items were designed not to need any + * special alignment handling. + */ +static int +xfs_bui_copy_format( +	struct xfs_log_iovec		*buf, +	struct xfs_bui_log_format	*dst_bui_fmt) +{ +	struct xfs_bui_log_format	*src_bui_fmt; +	uint				len; + +	src_bui_fmt = buf->i_addr; +	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + +	if (buf->i_len == len) { +		memcpy(dst_bui_fmt, src_bui_fmt, len); +		return 0; +	} +	return -EFSCORRUPTED; +} + +/* + * This routine is called to create an in-core extent bmap update + * item from the bui format structure which was logged on disk. + * It allocates an in-core bui, copies the extents from the format + * structure into it, and adds the bui to the AIL with the given + * LSN. + */ +STATIC int +xlog_recover_bui_pass2( +	struct xlog			*log, +	struct xlog_recover_item	*item, +	xfs_lsn_t			lsn) +{ +	int				error; +	struct xfs_mount		*mp = log->l_mp; +	struct xfs_bui_log_item		*buip; +	struct xfs_bui_log_format	*bui_formatp; + +	bui_formatp = item->ri_buf[0].i_addr; + +	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) +		return -EFSCORRUPTED; +	buip = xfs_bui_init(mp); +	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); +	if (error) { +		xfs_bui_item_free(buip); +		return error; +	} +	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); + +	spin_lock(&log->l_ailp->xa_lock); +	/* +	 * The RUI has two references. One for the RUD and one for RUI to ensure +	 * it makes it into the AIL. Insert the RUI into the AIL directly and +	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the +	 * AIL lock. +	 */ +	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn); +	xfs_bui_release(buip); +	return 0; +} + + +/* + * This routine is called when an BUD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding BUI if it + * was still in the log. To do this it searches the AIL for the BUI with an id + * equal to that in the BUD format structure. If we find it we drop the BUD + * reference, which removes the BUI from the AIL and frees it. + */ +STATIC int +xlog_recover_bud_pass2( +	struct xlog			*log, +	struct xlog_recover_item	*item) +{ +	struct xfs_bud_log_format	*bud_formatp; +	struct xfs_bui_log_item		*buip = NULL; +	struct xfs_log_item		*lip; +	__uint64_t			bui_id; +	struct xfs_ail_cursor		cur; +	struct xfs_ail			*ailp = log->l_ailp; + +	bud_formatp = item->ri_buf[0].i_addr; +	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) +		return -EFSCORRUPTED; +	bui_id = bud_formatp->bud_bui_id; + +	/* +	 * Search for the BUI with the id in the BUD format structure in the +	 * AIL. +	 */ +	spin_lock(&ailp->xa_lock); +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); +	while (lip != NULL) { +		if (lip->li_type == XFS_LI_BUI) { +			buip = (struct xfs_bui_log_item *)lip; +			if (buip->bui_format.bui_id == bui_id) { +				/* +				 * Drop the BUD reference to the BUI. This +				 * removes the BUI from the AIL and frees it. +				 */ +				spin_unlock(&ailp->xa_lock); +				xfs_bui_release(buip); +				spin_lock(&ailp->xa_lock); +				break; +			} +		} +		lip = xfs_trans_ail_cursor_next(ailp, &cur); +	} + +	xfs_trans_ail_cursor_done(&cur); +	spin_unlock(&ailp->xa_lock); + +	return 0; +} + +/*   * This routine is called when an inode create format structure is found in a   * committed transaction in the log.  It's purpose is to initialise the inodes   * being allocated on disk. This requires us to get inode cluster buffers that @@ -3741,6 +4019,10 @@ xlog_recover_ra_pass2(  	case XFS_LI_QUOTAOFF:  	case XFS_LI_RUI:  	case XFS_LI_RUD: +	case XFS_LI_CUI: +	case XFS_LI_CUD: +	case XFS_LI_BUI: +	case XFS_LI_BUD:  	default:  		break;  	} @@ -3766,6 +4048,10 @@ xlog_recover_commit_pass1(  	case XFS_LI_ICREATE:  	case XFS_LI_RUI:  	case XFS_LI_RUD: +	case XFS_LI_CUI: +	case XFS_LI_CUD: +	case XFS_LI_BUI: +	case XFS_LI_BUD:  		/* nothing to do in pass 1 */  		return 0;  	default: @@ -3800,6 +4086,14 @@ xlog_recover_commit_pass2(  		return xlog_recover_rui_pass2(log, item, trans->r_lsn);  	case XFS_LI_RUD:  		return xlog_recover_rud_pass2(log, item); +	case XFS_LI_CUI: +		return xlog_recover_cui_pass2(log, item, trans->r_lsn); +	case XFS_LI_CUD: +		return xlog_recover_cud_pass2(log, item); +	case XFS_LI_BUI: +		return xlog_recover_bui_pass2(log, item, trans->r_lsn); +	case XFS_LI_BUD: +		return xlog_recover_bud_pass2(log, item);  	case XFS_LI_DQUOT:  		return xlog_recover_dquot_pass2(log, buffer_list, item,  						trans->r_lsn); @@ -3846,14 +4140,13 @@ STATIC int  xlog_recover_commit_trans(  	struct xlog		*log,  	struct xlog_recover	*trans, -	int			pass) +	int			pass, +	struct list_head	*buffer_list)  {  	int				error = 0; -	int				error2;  	int				items_queued = 0;  	struct xlog_recover_item	*item;  	struct xlog_recover_item	*next; -	LIST_HEAD			(buffer_list);  	LIST_HEAD			(ra_list);  	LIST_HEAD			(done_list); @@ -3876,7 +4169,7 @@ xlog_recover_commit_trans(  			items_queued++;  			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {  				error = xlog_recover_items_pass2(log, trans, -						&buffer_list, &ra_list); +						buffer_list, &ra_list);  				list_splice_tail_init(&ra_list, &done_list);  				items_queued = 0;  			} @@ -3894,15 +4187,14 @@ out:  	if (!list_empty(&ra_list)) {  		if (!error)  			error = xlog_recover_items_pass2(log, trans, -					&buffer_list, &ra_list); +					buffer_list, &ra_list);  		list_splice_tail_init(&ra_list, &done_list);  	}  	if (!list_empty(&done_list))  		list_splice_init(&done_list, &trans->r_itemq); -	error2 = xfs_buf_delwri_submit(&buffer_list); -	return error ? error : error2; +	return error;  }  STATIC void @@ -4085,7 +4377,8 @@ xlog_recovery_process_trans(  	char			*dp,  	unsigned int		len,  	unsigned int		flags, -	int			pass) +	int			pass, +	struct list_head	*buffer_list)  {  	int			error = 0;  	bool			freeit = false; @@ -4109,7 +4402,8 @@ xlog_recovery_process_trans(  		error = xlog_recover_add_to_cont_trans(log, trans, dp, len);  		break;  	case XLOG_COMMIT_TRANS: -		error = xlog_recover_commit_trans(log, trans, pass); +		error = xlog_recover_commit_trans(log, trans, pass, +						  buffer_list);  		/* success or fail, we are now done with this transaction. */  		freeit = true;  		break; @@ -4191,10 +4485,12 @@ xlog_recover_process_ophdr(  	struct xlog_op_header	*ohead,  	char			*dp,  	char			*end, -	int			pass) +	int			pass, +	struct list_head	*buffer_list)  {  	struct xlog_recover	*trans;  	unsigned int		len; +	int			error;  	/* Do we understand who wrote this op? */  	if (ohead->oh_clientid != XFS_TRANSACTION && @@ -4221,8 +4517,39 @@ xlog_recover_process_ophdr(  		return 0;  	} +	/* +	 * The recovered buffer queue is drained only once we know that all +	 * recovery items for the current LSN have been processed. This is +	 * required because: +	 * +	 * - Buffer write submission updates the metadata LSN of the buffer. +	 * - Log recovery skips items with a metadata LSN >= the current LSN of +	 *   the recovery item. +	 * - Separate recovery items against the same metadata buffer can share +	 *   a current LSN. I.e., consider that the LSN of a recovery item is +	 *   defined as the starting LSN of the first record in which its +	 *   transaction appears, that a record can hold multiple transactions, +	 *   and/or that a transaction can span multiple records. +	 * +	 * In other words, we are allowed to submit a buffer from log recovery +	 * once per current LSN. Otherwise, we may incorrectly skip recovery +	 * items and cause corruption. +	 * +	 * We don't know up front whether buffers are updated multiple times per +	 * LSN. Therefore, track the current LSN of each commit log record as it +	 * is processed and drain the queue when it changes. Use commit records +	 * because they are ordered correctly by the logging code. +	 */ +	if (log->l_recovery_lsn != trans->r_lsn && +	    ohead->oh_flags & XLOG_COMMIT_TRANS) { +		error = xfs_buf_delwri_submit(buffer_list); +		if (error) +			return error; +		log->l_recovery_lsn = trans->r_lsn; +	} +  	return xlog_recovery_process_trans(log, trans, dp, len, -					   ohead->oh_flags, pass); +					   ohead->oh_flags, pass, buffer_list);  }  /* @@ -4240,7 +4567,8 @@ xlog_recover_process_data(  	struct hlist_head	rhash[],  	struct xlog_rec_header	*rhead,  	char			*dp, -	int			pass) +	int			pass, +	struct list_head	*buffer_list)  {  	struct xlog_op_header	*ohead;  	char			*end; @@ -4254,6 +4582,7 @@ xlog_recover_process_data(  	if (xlog_header_check_recover(log->l_mp, rhead))  		return -EIO; +	trace_xfs_log_recover_record(log, rhead, pass);  	while ((dp < end) && num_logops) {  		ohead = (struct xlog_op_header *)dp; @@ -4262,7 +4591,7 @@ xlog_recover_process_data(  		/* errors will abort recovery */  		error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, -						    dp, end, pass); +						   dp, end, pass, buffer_list);  		if (error)  			return error; @@ -4352,12 +4681,94 @@ xlog_recover_cancel_rui(  	spin_lock(&ailp->xa_lock);  } +/* Recover the CUI if necessary. */ +STATIC int +xlog_recover_process_cui( +	struct xfs_mount		*mp, +	struct xfs_ail			*ailp, +	struct xfs_log_item		*lip) +{ +	struct xfs_cui_log_item		*cuip; +	int				error; + +	/* +	 * Skip CUIs that we've already processed. +	 */ +	cuip = container_of(lip, struct xfs_cui_log_item, cui_item); +	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) +		return 0; + +	spin_unlock(&ailp->xa_lock); +	error = xfs_cui_recover(mp, cuip); +	spin_lock(&ailp->xa_lock); + +	return error; +} + +/* Release the CUI since we're cancelling everything. */ +STATIC void +xlog_recover_cancel_cui( +	struct xfs_mount		*mp, +	struct xfs_ail			*ailp, +	struct xfs_log_item		*lip) +{ +	struct xfs_cui_log_item		*cuip; + +	cuip = container_of(lip, struct xfs_cui_log_item, cui_item); + +	spin_unlock(&ailp->xa_lock); +	xfs_cui_release(cuip); +	spin_lock(&ailp->xa_lock); +} + +/* Recover the BUI if necessary. */ +STATIC int +xlog_recover_process_bui( +	struct xfs_mount		*mp, +	struct xfs_ail			*ailp, +	struct xfs_log_item		*lip) +{ +	struct xfs_bui_log_item		*buip; +	int				error; + +	/* +	 * Skip BUIs that we've already processed. +	 */ +	buip = container_of(lip, struct xfs_bui_log_item, bui_item); +	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) +		return 0; + +	spin_unlock(&ailp->xa_lock); +	error = xfs_bui_recover(mp, buip); +	spin_lock(&ailp->xa_lock); + +	return error; +} + +/* Release the BUI since we're cancelling everything. */ +STATIC void +xlog_recover_cancel_bui( +	struct xfs_mount		*mp, +	struct xfs_ail			*ailp, +	struct xfs_log_item		*lip) +{ +	struct xfs_bui_log_item		*buip; + +	buip = container_of(lip, struct xfs_bui_log_item, bui_item); + +	spin_unlock(&ailp->xa_lock); +	xfs_bui_release(buip); +	spin_lock(&ailp->xa_lock); +} +  /* Is this log item a deferred action intent? */  static inline bool xlog_item_is_intent(struct xfs_log_item *lip)  {  	switch (lip->li_type) {  	case XFS_LI_EFI:  	case XFS_LI_RUI: +	case XFS_LI_CUI: +	case XFS_LI_BUI:  		return true;  	default:  		return false; @@ -4421,6 +4832,12 @@ xlog_recover_process_intents(  		case XFS_LI_RUI:  			error = xlog_recover_process_rui(log->l_mp, ailp, lip);  			break; +		case XFS_LI_CUI: +			error = xlog_recover_process_cui(log->l_mp, ailp, lip); +			break; +		case XFS_LI_BUI: +			error = xlog_recover_process_bui(log->l_mp, ailp, lip); +			break;  		}  		if (error)  			goto out; @@ -4468,6 +4885,12 @@ xlog_recover_cancel_intents(  		case XFS_LI_RUI:  			xlog_recover_cancel_rui(log->l_mp, ailp, lip);  			break; +		case XFS_LI_CUI: +			xlog_recover_cancel_cui(log->l_mp, ailp, lip); +			break; +		case XFS_LI_BUI: +			xlog_recover_cancel_bui(log->l_mp, ailp, lip); +			break;  		}  		lip = xfs_trans_ail_cursor_next(ailp, &cur); @@ -4546,6 +4969,7 @@ xlog_recover_process_one_iunlink(  	if (error)  		goto fail_iput; +	xfs_iflags_clear(ip, XFS_IRECOVERY);  	ASSERT(VFS_I(ip)->i_nlink == 0);  	ASSERT(VFS_I(ip)->i_mode != 0); @@ -4685,7 +5109,8 @@ xlog_recover_process(  	struct hlist_head	rhash[],  	struct xlog_rec_header	*rhead,  	char			*dp, -	int			pass) +	int			pass, +	struct list_head	*buffer_list)  {  	int			error;  	__le32			crc; @@ -4732,7 +5157,8 @@ xlog_recover_process(  	if (error)  		return error; -	return xlog_recover_process_data(log, rhash, rhead, dp, pass); +	return xlog_recover_process_data(log, rhash, rhead, dp, pass, +					 buffer_list);  }  STATIC int @@ -4793,9 +5219,11 @@ xlog_do_recovery_pass(  	char			*offset;  	xfs_buf_t		*hbp, *dbp;  	int			error = 0, h_size, h_len; +	int			error2 = 0;  	int			bblks, split_bblks;  	int			hblks, split_hblks, wrapped_hblks;  	struct hlist_head	rhash[XLOG_RHASH_SIZE]; +	LIST_HEAD		(buffer_list);  	ASSERT(head_blk != tail_blk);  	rhead_blk = 0; @@ -4981,7 +5409,7 @@ xlog_do_recovery_pass(  			}  			error = xlog_recover_process(log, rhash, rhead, offset, -						     pass); +						     pass, &buffer_list);  			if (error)  				goto bread_err2; @@ -5012,7 +5440,8 @@ xlog_do_recovery_pass(  		if (error)  			goto bread_err2; -		error = xlog_recover_process(log, rhash, rhead, offset, pass); +		error = xlog_recover_process(log, rhash, rhead, offset, pass, +					     &buffer_list);  		if (error)  			goto bread_err2; @@ -5025,10 +5454,17 @@ xlog_do_recovery_pass(   bread_err1:  	xlog_put_bp(hbp); +	/* +	 * Submit buffers that have been added from the last record processed, +	 * regardless of error status. +	 */ +	if (!list_empty(&buffer_list)) +		error2 = xfs_buf_delwri_submit(&buffer_list); +  	if (error && first_bad)  		*first_bad = rhead_blk; -	return error; +	return error ? error : error2;  }  /*  |