diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 143 | 
1 files changed, 140 insertions, 3 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 24d03c751149..e692eea87af6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4415,6 +4415,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,  	return ret;  } +/* + * When we are logging a new inode X, check if it doesn't have a reference that + * matches the reference from some other inode Y created in a past transaction + * and that was renamed in the current transaction. If we don't do this, then at + * log replay time we can lose inode Y (and all its files if it's a directory): + * + * mkdir /mnt/x + * echo "hello world" > /mnt/x/foobar + * sync + * mv /mnt/x /mnt/y + * mkdir /mnt/x                 # or touch /mnt/x + * xfs_io -c fsync /mnt/x + * <power fail> + * mount fs, trigger log replay + * + * After the log replay procedure, we would lose the first directory and all its + * files (file foobar). + * For the case where inode Y is not a directory we simply end up losing it: + * + * echo "123" > /mnt/foo + * sync + * mv /mnt/foo /mnt/bar + * echo "abc" > /mnt/foo + * xfs_io -c fsync /mnt/foo + * <power fail> + * + * We also need this for cases where a snapshot entry is replaced by some other + * entry (file or directory) otherwise we end up with an unreplayable log due to + * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as + * if it were a regular entry: + * + * mkdir /mnt/x + * btrfs subvolume snapshot /mnt /mnt/x/snap + * btrfs subvolume delete /mnt/x/snap + * rmdir /mnt/x + * mkdir /mnt/x + * fsync /mnt/x or fsync some new file inside it + * <power fail> + * + * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in + * the same transaction. + */ +static int btrfs_check_ref_name_override(struct extent_buffer *eb, +					 const int slot, +					 const struct btrfs_key *key, +					 struct inode *inode) +{ +	int ret; +	struct btrfs_path *search_path; +	char *name = NULL; +	u32 name_len = 0; +	u32 item_size = btrfs_item_size_nr(eb, slot); +	u32 cur_offset = 0; +	unsigned long ptr = btrfs_item_ptr_offset(eb, slot); + +	search_path = btrfs_alloc_path(); +	if (!search_path) +		return -ENOMEM; +	search_path->search_commit_root = 1; +	search_path->skip_locking = 1; + +	while (cur_offset < item_size) { +		u64 parent; +		u32 this_name_len; +		u32 this_len; +		unsigned long name_ptr; +		struct btrfs_dir_item *di; + +		if (key->type == BTRFS_INODE_REF_KEY) { +			struct btrfs_inode_ref *iref; + +			iref = (struct btrfs_inode_ref *)(ptr + cur_offset); +			parent = key->offset; +			this_name_len = btrfs_inode_ref_name_len(eb, iref); +			name_ptr = (unsigned long)(iref + 1); +			this_len = sizeof(*iref) + this_name_len; +		} else { +			struct btrfs_inode_extref *extref; + +			extref = (struct btrfs_inode_extref *)(ptr + +							       cur_offset); +			parent = btrfs_inode_extref_parent(eb, extref); +			this_name_len = btrfs_inode_extref_name_len(eb, extref); +			name_ptr = (unsigned long)&extref->name; +			this_len = sizeof(*extref) + this_name_len; +		} + +		if (this_name_len > name_len) { +			char *new_name; + +			new_name = krealloc(name, this_name_len, GFP_NOFS); +			if (!new_name) { +				ret = -ENOMEM; +				goto out; +			} +			name_len = this_name_len; +			name = new_name; +		} + +		read_extent_buffer(eb, name, name_ptr, this_name_len); +		di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root, +					   search_path, parent, +					   name, this_name_len, 0); +		if (di && !IS_ERR(di)) { +			ret = 1; +			goto out; +		} else if (IS_ERR(di)) { +			ret = PTR_ERR(di); +			goto out; +		} +		btrfs_release_path(search_path); + +		cur_offset += this_len; +	} +	ret = 0; +out: +	btrfs_free_path(search_path); +	kfree(name); +	return ret; +} +  /* log a single inode in the tree log.   * At least one parent directory for this inode must exist in the tree   * or be logged already. @@ -4602,6 +4723,22 @@ again:  		if (min_key.type == BTRFS_INODE_ITEM_KEY)  			need_log_inode_item = false; +		if ((min_key.type == BTRFS_INODE_REF_KEY || +		     min_key.type == BTRFS_INODE_EXTREF_KEY) && +		    BTRFS_I(inode)->generation == trans->transid) { +			ret = btrfs_check_ref_name_override(path->nodes[0], +							    path->slots[0], +							    &min_key, inode); +			if (ret < 0) { +				err = ret; +				goto out_unlock; +			} else if (ret > 0) { +				err = 1; +				btrfs_set_log_full_commit(root->fs_info, trans); +				goto out_unlock; +			} +		} +  		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */  		if (min_key.type == BTRFS_XATTR_ITEM_KEY) {  			if (ins_nr == 0) @@ -4851,7 +4988,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,  			goto out;  	if (!S_ISDIR(inode->i_mode)) { -		if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) +		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)  			goto out;  		inode = d_inode(parent);  	} @@ -4872,7 +5009,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,  			break;  		} -		if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) +		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)  			break;  		if (IS_ROOT(parent)) @@ -5285,7 +5422,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,  	}  	while (1) { -		if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) +		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)  			break;  		inode = d_inode(parent);  |