diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 134 | 
1 files changed, 108 insertions, 26 deletions
| diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1650dc44a5e3..0dba09334a16 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -205,14 +205,11 @@ static int join_running_log_trans(struct btrfs_root *root)   * until you call btrfs_end_log_trans() or it makes any future   * log transactions wait until you call btrfs_end_log_trans()   */ -int btrfs_pin_log_trans(struct btrfs_root *root) +void btrfs_pin_log_trans(struct btrfs_root *root)  { -	int ret = -ENOENT; -  	mutex_lock(&root->log_mutex);  	atomic_inc(&root->log_writers);  	mutex_unlock(&root->log_mutex); -	return ret;  }  /* @@ -258,6 +255,13 @@ struct walk_control {  	/* what stage of the replay code we're currently in */  	int stage; +	/* +	 * Ignore any items from the inode currently being processed. Needs +	 * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in +	 * the LOG_WALK_REPLAY_INODES stage. +	 */ +	bool ignore_cur_inode; +  	/* the root we are currently replaying */  	struct btrfs_root *replay_dest; @@ -2487,6 +2491,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  			inode_item = btrfs_item_ptr(eb, i,  					    struct btrfs_inode_item); +			/* +			 * If we have a tmpfile (O_TMPFILE) that got fsync'ed +			 * and never got linked before the fsync, skip it, as +			 * replaying it is pointless since it would be deleted +			 * later. We skip logging tmpfiles, but it's always +			 * possible we are replaying a log created with a kernel +			 * that used to log tmpfiles. +			 */ +			if (btrfs_inode_nlink(eb, inode_item) == 0) { +				wc->ignore_cur_inode = true; +				continue; +			} else { +				wc->ignore_cur_inode = false; +			}  			ret = replay_xattr_deletes(wc->trans, root, log,  						   path, key.objectid);  			if (ret) @@ -2524,16 +2542,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  					     root->fs_info->sectorsize);  				ret = btrfs_drop_extents(wc->trans, root, inode,  							 from, (u64)-1, 1); -				/* -				 * If the nlink count is zero here, the iput -				 * will free the inode.  We bump it to make -				 * sure it doesn't get freed until the link -				 * count fixup is done. -				 */  				if (!ret) { -					if (inode->i_nlink == 0) -						inc_nlink(inode); -					/* Update link count and nbytes. */ +					/* Update the inode's nbytes. */  					ret = btrfs_update_inode(wc->trans,  								 root, inode);  				} @@ -2548,6 +2558,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  				break;  		} +		if (wc->ignore_cur_inode) +			continue; +  		if (key.type == BTRFS_DIR_INDEX_KEY &&  		    wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {  			ret = replay_one_dir_item(wc->trans, root, path, @@ -3196,9 +3209,12 @@ static void free_log_tree(struct btrfs_trans_handle *trans,  	};  	ret = walk_log_tree(trans, log, &wc); -	/* I don't think this can happen but just in case */ -	if (ret) -		btrfs_abort_transaction(trans, ret); +	if (ret) { +		if (trans) +			btrfs_abort_transaction(trans, ret); +		else +			btrfs_handle_fs_error(log->fs_info, ret, NULL); +	}  	while (1) {  		ret = find_first_extent_bit(&log->dirty_log_pages, @@ -5564,9 +5580,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,  			dir_inode = btrfs_iget(fs_info->sb, &inode_key,  					       root, NULL); -			/* If parent inode was deleted, skip it. */ -			if (IS_ERR(dir_inode)) -				continue; +			/* +			 * If the parent inode was deleted, return an error to +			 * fallback to a transaction commit. This is to prevent +			 * getting an inode that was moved from one parent A to +			 * a parent B, got its former parent A deleted and then +			 * it got fsync'ed, from existing at both parents after +			 * a log replay (and the old parent still existing). +			 * Example: +			 * +			 * mkdir /mnt/A +			 * mkdir /mnt/B +			 * touch /mnt/B/bar +			 * sync +			 * mv /mnt/B/bar /mnt/A/bar +			 * mv -T /mnt/A /mnt/B +			 * fsync /mnt/B/bar +			 * <power fail> +			 * +			 * If we ignore the old parent B which got deleted, +			 * after a log replay we would have file bar linked +			 * at both parents and the old parent B would still +			 * exist. +			 */ +			if (IS_ERR(dir_inode)) { +				ret = PTR_ERR(dir_inode); +				goto out; +			}  			if (ctx)  				ctx->log_new_dentries = false; @@ -5640,7 +5680,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,  	if (ret)  		goto end_no_trans; -	if (btrfs_inode_in_log(inode, trans->transid)) { +	/* +	 * Skip already logged inodes or inodes corresponding to tmpfiles +	 * (since logging them is pointless, a link count of 0 means they +	 * will never be accessible). +	 */ +	if (btrfs_inode_in_log(inode, trans->transid) || +	    inode->vfs_inode.i_nlink == 0) {  		ret = BTRFS_NO_LOG_SYNC;  		goto end_no_trans;  	} @@ -6025,14 +6071,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,   * Call this after adding a new name for a file and it will properly   * update the log to reflect the new name.   * - * It will return zero if all goes well, and it will return 1 if a - * full transaction commit is required. + * @ctx can not be NULL when @sync_log is false, and should be NULL when it's + * true (because it's not used). + * + * Return value depends on whether @sync_log is true or false. + * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be + *            committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT + *            otherwise. + * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to + *             to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log, + *             or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be + *             committed (without attempting to sync the log).   */  int btrfs_log_new_name(struct btrfs_trans_handle *trans,  			struct btrfs_inode *inode, struct btrfs_inode *old_dir, -			struct dentry *parent) +			struct dentry *parent, +			bool sync_log, struct btrfs_log_ctx *ctx)  {  	struct btrfs_fs_info *fs_info = trans->fs_info; +	int ret;  	/*  	 * this will force the logging code to walk the dentry chain @@ -6047,9 +6104,34 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,  	 */  	if (inode->logged_trans <= fs_info->last_trans_committed &&  	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) -		return 0; +		return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT : +			BTRFS_DONT_NEED_LOG_SYNC; + +	if (sync_log) { +		struct btrfs_log_ctx ctx2; + +		btrfs_init_log_ctx(&ctx2, &inode->vfs_inode); +		ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, +					     LOG_INODE_EXISTS, &ctx2); +		if (ret == BTRFS_NO_LOG_SYNC) +			return BTRFS_DONT_NEED_TRANS_COMMIT; +		else if (ret) +			return BTRFS_NEED_TRANS_COMMIT; + +		ret = btrfs_sync_log(trans, inode->root, &ctx2); +		if (ret) +			return BTRFS_NEED_TRANS_COMMIT; +		return BTRFS_DONT_NEED_TRANS_COMMIT; +	} + +	ASSERT(ctx); +	ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, +				     LOG_INODE_EXISTS, ctx); +	if (ret == BTRFS_NO_LOG_SYNC) +		return BTRFS_DONT_NEED_LOG_SYNC; +	else if (ret) +		return BTRFS_NEED_TRANS_COMMIT; -	return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, -				      LOG_INODE_EXISTS, NULL); +	return BTRFS_NEED_LOG_SYNC;  } |